/* file: gts_corrGEE.sas */ /* date: 12/31/2003 */ /* modified: */ /* name: John Preisser */ /* project: GTS (Green Tobacco Sickness) */ /* request: run prentice GEE macro on GTS data for */ /* AJE response letter */ /* input files: ~/gts/ClusterEpipaper4/daily2.ssd01 */ /* output files: none */ /* Note: SAS IML is used to construct the Z matrix */ libname dat "/home/faculty/jpreisse/gtspapers/ClusterEpipaper4"; *libname formats "/home/projects/gts/sas_formats"; %include "/home/faculty/jpreisse/gee/Zink/geecorr1.01.macro"; options ls=80 ps=60; options fmtsearch=(work.format, formats.formats, fmt.format); proc format; value workty 1 = 'prime' 2 = 'prime/barn' 3 = 'top' 4 = 'barn' 5 = 'other(none)' ; run; *********************************************************; data xy; set dat.daily2; one=1; if (1 le cum_day le 34) then season=3; else if (35 le cum_day le 55) then season=2; else if cum_day ge 56 then season=1; temp92=temp-92; if workty in (4,5) then workty=4; /* combine 'barn & other' */ run; *********************************************************; proc sort; by siteid newpid cum_day; run; proc summary data=xy; var gts; by siteid; output out = clssize n = n; run; * determine dimension of z; data dimz; set clssize; nc2 = n*(n-1)/2; run; proc summary; var nc2; output out = ormatrix sum = sum; run; proc print data=ormatrix; run; * merge cluster size with xy data set; data xy2; set xy; merge xy clssize; by siteid; run; proc iml; use xy2; read all var{gts} into gts; read all var{siteid} into siteid; read all var{newpid} into newpid; read all var{cum_day} into cum_day; close xy2; use clssize; read all var{n} into n; close clssize; /* print siteid newpid cum_day gts; */ * create z matrix, 1st (2nd) column is 1 (0) if same worker, 0 (1) otherwise; q = 255949; z = J(q,2,0); k=37 /* number of clusters */; n1 = n - J(k,1,1); t=1; sumn=0; do i = 1 to k; do j=1 to n1[i]; do k=j+1 to n[i]; dum = (newpid[sumn+j]=newpid[sumn+k]); z[t,]=dum||(1-dum); t=t+1; end; end; sumn = sumn + n[i]; end; zc = z[1:500,]; print zc; C1 = {worker camp}; create zd from z [colname=c1]; setout zd; append from z; quit; proc print data=zdata(obs=50); data w(keep=siteid one); set clssize; one =1; run; title "Green Tobacco Sickness"; %geecorr(xydata=xy, yvar=gts, xvar = one, id=siteid, zdata=zd, zvar = worker camp, wdata=w, wvar = one, maxiter=20, epsilon=0.00001, printrange=YES, shrink = ALPHA);