/* file: gts_alr4alpha.sas */ /* old name: ~/gts/JQW1/cluster/alrtab5c.sas */ /* date: 7/8/2001 */ /* name: John Preisser */ /* project: GTS (Green Tobacco Sickness) */ /* "Detecting Patterns of Occupational Illness Clustering */ /* with Alternating Logistic Regressions Applied to */ /* Longitudinal Data" by Preisser, Arcury and Quandt */ /* Amercian Journal of Epidemiology, 2003; 57:550-2. */ /* request: Fit 4 parameter POR model using alr with */ /* SAS PROC GENMOD using option ZDATA on the repeated statement */ /* input files: ~/gts/datasets/daily2.ssd01 */ /* output files: none */ /* Note: Use ZDATA instead of writing out replicated Z- matrix */ libname dat "/home/projects/gts/datasets"; libname formats "/home/projects/gts/sas_formats"; filename zout "/home/projects/gts/JQW1/cluster/zrepdays.out"; options ls=80 ps=60; options fmtsearch=(work.format, formats.formats, fmt.format); proc format; value workty 1 = 'prime' 2 = 'prime/barn' 3 = 'top' 4 = 'barn' 5 = 'other(none)' ; run; *********************************************************; data gmd01; set dat.daily2; if (1 le cum_day le 34) then season=3; else if (35 le cum_day le 55) then season=2; else if cum_day ge 56 then season=1; temp92=temp-92; if workty in (4,5) then workty=4; /* combine 'barn & other' */ run; *********************************************************; proc freq; title 'Number of observations per day'; table cum_day; run; /* create wid */ proc sort data=gmd01; by siteid newpid cum_day; run; proc freq data=gmd01; title 'Number of workers per site'; table newpid/ out=numdays; run; /* proc print data=numdays; run; */ data numdays2(keep=newpid pidcnt); set numdays; pidcnt=count; run; proc sort data=gmd01; by newpid; run; proc sort data=numdays2; by newpid; run; data gmd1; merge gmd01 numdays2; by newpid; run; * sort by pidcnt to try to minimize size of zrep; proc sort data=gmd1; by siteid newpid; ******** by siteid descending pidcnt newpid; run; /* proc print data=gmd1(obs=500); title 'data is gmd1'; var siteid newpid pidcnt cum_day gts; run; */ * instead of newpid sort by worker according to number of observations descending; data gmd2(keep=siteid newpid period cum_day wid gts pidcnt); set gmd1; ******** by siteid descending pidcnt newpid; by siteid newpid cum_day; if first.siteid then WID=0; if first.newpid then WID+1; run; proc print data=gmd2(obs=500); title 'data is gmd2; order by worker count within sites'; var siteid newpid pidcnt cum_day gts; run; data gmd3; set gmd2; ********* by siteid descending pidcnt newpid; by siteid newpid; Y=(wid-1)*83+cum_day; /* create y=1 to 747 */ run; proc print data=gmd3(obs=500); title 'data is gmd3'; var siteid newpid cum_day wid y; run; proc freq data=gmd3; tables period; proc freq data=gmd3; tables siteid*newpid*wid*period/list; run; *********************************************************; /* create zin */ * Z-matrix for a complete cluster ; * alternatively, one could use SAS PROC IML; *********************************************************; /* create zrep */ * 83 days times 9 workers equals 747 max cls size; data zrep; keep y1 y2 z1-z4 one /* cday1 cday2 worker1 worker2 */; one=1; do m=1 to 746; do n=m+1 to 747; y1=m; y2=n; worker1=int(y1/83)+1; worker2=int(y2/83)+1; cday1 = mod(y1,83); cday2 = mod(y2,83); if cday1=0 then do; cday1=83; worker1=worker1-1; end; if cday2=0 then do; cday2=83; worker2=worker2-1; end; /* if worker1=worker2 then do; z1=1; z2=0; z3=0; end; else if cday2-cday1 le 6 then do; z1=0; z2=1; z3=0; end; else do; z1=0; z2=0; z3=1; end; */ if ((worker1=worker2) and (cday2-cday1 le 6)) then do; z1=1; z2=0; z3=0; z4=0; end; if ((worker1=worker2) and (cday2-cday1 > 6)) then do; z1=0; z2=1; z3=0; z4=0; end; if ((worker1 ne worker2) and (cday2-cday1 le 6)) then do; z1=0; z2=0; z3=1; z4=0; end; if ((worker1 ne worker2) and (cday2-cday1 > 6)) then do; z1=0; z2=0; z3=0; z4=1; end; output; end; end; run; * Merge with wid (actual data) and delete the rows not represented in actual data; proc print data=zrep(obs=500); title 'data is zrep - full'; run; proc sort data=gmd3; by y; run; data uniqy(keep=y1 y2); set gmd3; by y; if first.y; y1=y; y2=y; run; /* proc print data=uniqy; run; */ proc sort data=zrep out=zrep; by y2; run; data zrep2; merge uniqy(in=ina) zrep(in=inb); by y2; if ina and inb; run; proc sort data=zrep2; by y1; run; * this merge deletes those rows from zrep that are not represented in the data; data zrep3; merge uniqy(in=ina) zrep2(in=inb); by y1; if ina and inb; run; proc print data=zrep3(obs=500); run; * this is the 4 parameter POR model given by expression (5); proc genmod data=gmd3 descending; class siteid y; model gts= /dist=bin noscale scale=1; repeated subject=siteid / withinsubject=y LogOr=zrep zdata=zrep3 zrow=(z1-z4) ypair=(y1 y2) ecovb; run;