/* SAS Macro for Longitudinal Data Analysis: ========================================= GEE is a SAS macro for analyzing longitudinal data. This SAS IML macro uses the GEE approach of Liang and Zeger (1986) to model longitudinal data for a general class of outcome variables including gaussian, poisson, binary and gamma outcomes. The program uses an iterative procedure to estimate regression coefficients, treating the correlation among observation on the same individual as a nuisance. Final output from GEE includes: regression coefficients, naive and robust estimates of variance and z-score. Following command, with appropriate parameters, can be used to invoke the macro. All parameters have been assigned default values, so that they can be omitted if default values are acceptable. (Defaults are shown within {}). Parameters may be given in any order. %GEE ( DATA = SAS dataset, { _LAST_ } YVAR = y-variable, { Y } XVAR = x-variables, { X } ID = id-variable, { ID } LINK = link function, { 1 } VARI = mean-variance relation, { 1 } N = binomial denominator variable, {_1_} CORR = correlation structure, { 1 } M = dependence, { 1 } R = given correlation matrix, { I } BETA = initial estimate of beta, { LSE } OFFS = offset variable, { _0_ } OUT = output dataset, { _NULL_ } ITER = maximum iterations, { 20 } CRIT = convergence criterion { 0.001 } ) ; ========================================================================= Date: 7/4/89 Author: M. Rezaul Karim Department of Biostatistics The Johns Hopkins University ==========================================================================*/ %MACRO GEE ( DATA =_last_, YVAR =y, XVAR =x, ID =id, LINK =1, VARI =1, N =_1_, CORR =1, M =1, R =I, BETA =0, OFFS =_0_, OUT =_NULL_, ITER =20, CRIT =0.001); OPTION nocenter; PROC IML WORKSIZE=999; RESET noname; USE &DATA; SETIN &DATA NOBS nobs; link={ &LINK }; vari={ &VARI }; corr={ &CORR }; m={ &M }; r={ &R }; n=INT(SQRT(NROW(r)#NCOL(r))); r=SHAPE(r,n,n); beta={ &BETA }; labely={ &YVAR }; labelx={ &XVAR }`; labelo={ &OFFS }; IF labelo={'_0_'} THEN offset=0; labeln={ &N }; IF labeln={'_1_'} THEN n=1; START init; /**********************************************************/ R1={'Data File:' &DATA}; PRINT / 'Regression analysis using GEE: ( Ver - 1.25 )', '=============================',,, R1; R1={'Outcome variable:'}; R2={'Covariates:'}; PRINT labely [ROWNAME=R1], {&XVAR} [ROWNAME=R2]; R1={'Offset:'}; IF labelo^={'_0_'} THEN PRINT labelo [ROWNAME=R1]; _1={'(Identity)'}; _2={'(Logarithm)'}; _3={'(Logit)'}; _4={'(Reciprocal)'}; R1={'Link:'}; IF (link<1 | link>4) THEN PRINT link [ROWNAME=R1 FORMAT=2.0] {'(Invalid Option !!!)'}; ELSE PRINT link [ROWNAME=R1 FORMAT=2.0] _&LINK ; _1={'(Gaussian)'}; _2={'(Poisson)'}; _3={'(Binomial)'}; _4={'(Gamma)'}; R1={'Variance:'}; IF (vari<1 | vari>4) THEN PRINT vari [ROWNAME=R1 FORMAT=2.0] {'(Invalid Option !!!)'}; ELSE PRINT vari [ROWNAME=R1 FORMAT=2.0] _&VARI ; R1={'Denominator'}; IF vari=3 THEN PRINT labeln [ROWNAME=R1]; FREE _1 _2 _3 _4; R1={'Correlation:'}; IF corr=1 THEN DO; IF NCOL(r)=1 THEN PRINT corr [ROWNAME=R1 FORMAT=2.0] {'(Independent)'}; ELSE PRINT corr [ROWNAME=R1 FORMAT=2.0] {'(R given):'}, r [FORMAT=4.2]; END; R2={'(Stationary'}; IF corr=2 THEN PRINT corr [ROWNAME=R1 FORMAT=2.0] m [ROWNAME=R2 FORMAT=2.0] {'- dependent)'}; R2={'(NonStationary'}; IF corr=3 THEN PRINT corr [ROWNAME=R1 FORMAT=2.0] m [ROWNAME=R2 FORMAT=2.0] {'- dependent)'}; IF corr=4 THEN PRINT corr [ROWNAME=R1 FORMAT=2.0] {'(Exchangeable)'}; R2={'(AR -'}; IF corr=5 THEN PRINT corr [ROWNAME=R1 FORMAT=2.0] m [ROWNAME=R2 FORMAT=2.0] {')'}; IF corr=6 THEN PRINT corr [ROWNAME=R1 FORMAT=2.0] {'(Unspecified)'}; R1={'Total number of records read:'}; PRINT nobs [ROWNAME=R1 FORMAT=8.0]; p=NROW(labelx); dmean=0; imean=0; READ VAR { &ID } INTO idk; READ VAR labely INTO yvar; READ VAR labelx INTO xvar; IF labeln^={'_1_'} THEN DO; READ VAR labeln INTO n; END; IF labelo^={'_0_'} THEN DO; READ VAR labelo INTO offset; END; vsum=yvar||xvar; IF NCOL(beta)=1 THEN DO; xty=xvar`*(yvar/n-offset); xtx=xvar`*xvar; END; war=J(1,p,1); war=war#(war=xvar); k=1; i=1; ni=0; DO j=2 TO nobs; READ VAR { &ID } INTO idj POINT j; READ VAR labely INTO yvar; READ VAR labelx INTO xvar; IF labeln^={'_1_'} THEN DO; READ VAR labeln INTO n; END; IF labelo^={'_0_'} THEN DO; READ VAR labelo INTO offset; END; IF idk=idj THEN i=i+1; ELSE DO; imean=imean+vsum/i; dmean=dmean+vsum; vsum=0; ni[k]=i; k=k+1; idk=idj; i=1; ni=ni//{0}; END; war=war#(war=xvar); vsum=vsum+(yvar||xvar); IF NCOL(beta)=1 THEN DO; xty=xty+xvar`*(yvar/n-offset); xtx=xtx+xvar`*xvar; END; END; ni[k]=i; imean=(imean+vsum/i)/k; dmean=((dmean+vsum)/nobs)//imean; IF NCOL(beta)=1 THEN beta=SOLVE(xtx,xty); ELSE beta=SHAPE(beta,p,1,0); maxn=MAX(ni); minn=MIN(ni); R1={'Total number of clusters:'}; PRINT k [ROWNAME=R1 FORMAT=5.0]; R1={'Maximum and minimum cluster size:'}; R2={'and'}; PRINT maxn [ROWNAME=R1 FORMAT=5.0] minn [ROWNAME=R2 FORMAT=5.0]; R1={'Observations: ','Cluster Means:'}; C1={ &YVAR &XVAR }; PRINT 'Averages of Outcome variable and Covariates (over all)',, dmean [ROWNAME=R1 COLNAME=C1],; IF ALL(^war) THEN PRINT '*** WARNING: No intercept term in the model!'; PRINT / 'Initial estimate of regression coefficients:', labelx beta; FREE dmean i idj idk imean j vsum xtx xty xvar yvar; *show names; FINISH; /** INIT *******************************************************/ START estb; /***********************************************************/ us=J(p,1,0); m0=J(p,p,0); nx=0; DO j=1 TO k; nj=ni[j]; i=(nx+1):(nx+nj); nx=nx+nj; READ VAR labely INTO yvar POINT i; READ VAR labelx INTO xvar POINT i; IF labeln^={'_1_'} THEN DO; READ VAR labeln INTO n POINT i; END; IF labelo^={'_0_'} THEN DO; READ VAR labelo INTO offset POINT i; END; *** Calculate ui and di; lp=xvar*beta+offset; IF link=1 THEN DO; ui=lp; di=xvar; END; ELSE IF link=2 THEN DO; ui=EXP(lp); di=ui#xvar; END; ELSE IF link=3 THEN DO; ui=EXP(lp); ui=(n#ui)/(1+ui); di=(ui#(1-ui/n))#xvar; END; ELSE IF link=4 THEN DO; ui=1/(lp); di=-(ui#ui)#xvar; END; zi=di*beta+(yvar-ui); *** Calculate a*zi and a*di; IF vari=1 THEN ui=1; ELSE IF vari=2 THEN ui=1/SQRT(ui); ELSE IF vari=3 THEN ui=1/SQRT(ui#(1-ui/n)); ELSE IF vari=4 THEN ui=1/ABS(ui); di=di#ui; zi=zi#ui; vinv=INV(r[1:nj,1:nj]); us=us+di`*vinv*zi; m0=m0+di`*vinv*di; END; *** End of beta estimation loop; beta=solve(m0,us); C1={'Estimate'}; PRINT labelx beta [COLNAME=C1]; FREE di i j m0 nj nx us ui vinv xvar yvar zi; *show names; FINISH; /** ESTB *******************************************************/ START estr; /***********************************************************/ IF corr=1 THEN DO; END; /* given correlation */ ELSE IF corr=2 THEN alp=J(1,m,0); /* stationary m-dependent */ ELSE IF corr=3 THEN alp=J(maxn,maxn,0); /* non stationary m-dept */ ELSE IF corr=4 THEN alp=0; /* exchangeable */ ELSE IF corr=5 THEN alp=J(1,m,0); /* AR-m */ ELSE IF corr=6 THEN alp=J(maxn,maxn,0); /* unspecified */ sigma=0; nx=0; DO j=1 TO k; nj=ni[j]; i=(nx+1):(nx+nj); nx=nx+nj; READ VAR labely INTO yvar POINT i; READ VAR labelx INTO xvar POINT i; IF labeln^={'_1_'} THEN DO; READ VAR labeln INTO n POINT i; END; IF labelo^={'_0_'} THEN DO; READ VAR labelo INTO offset POINT i; END; lp=xvar*beta+offset; IF link=1 THEN DO; ui=lp; END; ELSE IF link=2 THEN DO; ui=EXP(lp); END; ELSE IF link=3 THEN DO; ui=EXP(lp); ui=(n#ui)/(1+ui); END; ELSE IF link=4 THEN DO; ui=1/(lp); END; ei=yvar-ui; IF vari=1 THEN ui=1; ELSE IF vari=2 THEN ui=1/SQRT(ui); ELSE IF vari=3 THEN ui=1/SQRT(ui#(1-ui/n)); ELSE IF vari=4 THEN ui=1/ABS(ui); ei=ei#ui; sigma=sigma+SSQ(ei)/nj; IF corr=1 THEN DO; END; ELSE IF corr=2 THEN DO; i=nj/(nj+1-(1:m)); alp=alp+COVLAG(ei,-m)#i; END; ELSE IF corr=3 THEN alp=alp+ei*ei`; ELSE IF corr=4 THEN DO; IF (nj>1) THEN alp=alp+(SUM(ei*ei`)-SSQ(ei))/(nj#(nj-1)); END; ELSE IF corr=5 THEN DO; i=nj/(nj+1-(1:m)); alp=alp+COVLAG(ei,-m)#i; END; ELSE IF corr=6 THEN alp=alp+ei*ei`; END; *** End of working covariance estimation loop; IF corr=1 THEN DO; END; ELSE IF corr=2 THEN DO; alp=alp/sigma; alp[1]=1; r=SHAPE(alp,1,maxn,0); r=TOEPLITZ(r); END; ELSE IF corr=3 THEN DO; r=alp/sigma; DO j=1 TO maxn; r[j,j]=1; DO i=j+m TO maxn; r[i,j]=0 ; r[j,i]=0 ; END; END; END; ELSE IF corr=4 THEN DO; alp=alp/sigma; r=J(1,maxn,alp); r[1]=1; r=TOEPLITZ(r); END; ELSE IF corr=5 THEN DO; alp=alp/sigma; alp[1]=1; r=SHAPE(alp,1,maxn,0); i=TOEPLITZ(alp[1:m-1]); alp=alp[1,2:m]*INV(i); DO i=m+1 TO maxn; DO j=1 TO m-1; r[i]=r[i]+alp[j]#r[i-j]; END; END; r=TOEPLITZ(r); END; ELSE IF corr=6 THEN DO; r=alp/sigma; DO j=1 TO maxn; r[j,j]=1; END; END; FREE alp ei i j nj nx ui xvar yvar; *show names; FINISH; /** ESTR *******************************************************/ /***********************************************************************/ /* Main Program: */ /***********************************************************************/ RUN init; IF NCOL(r)<=1 THEN r=I(maxn); START; /** Check for consistency ***************************************/ crit=1; IF corr=1 THEN DO; IF NCOL(r)=minn THEN DO; PRINT 'ERROR: Gorup size too small for m-dependent correlation.'; crit=0; END; m=m+1; END; IF corr=4 THEN DO; END; IF corr=5 THEN DO; IF m>=minn THEN DO; PRINT 'ERROR: Gorup size too small for AR-m correlation.'; crit=0; END; m=m+1; END; IF corr=6 | corr=3 THEN DO; IF maxn=minn THEN DO; END; ELSE DO; PRINT 'ERROR: Unequal gorup size.'; crit=0; END; END; FINISH; RUN; /** End for consistency check *****************************/ START; /** Main iteration **********************************************/ IF crit=0 THEN STOP; DO iter=1 TO &ITER WHILE(crit>&CRIT); R1={'===> Iteration:'}; PRINT iter [ROWNAME=R1 FORMAT=3.0]; save=beta; IF corr>1 THEN RUN estr; RUN estb; crit=MAX(ABS(1-save/beta)); END; *** End of iterations; *show names; iter=iter-1; IF iter>=&ITER THEN PRINT ' ' / {'No Convergence after'} {&ITER} [FORMAT=3.0] {'iterations.'}; ELSE PRINT ' ' / {'Convergence after'} iter [FORMAT=3.0] {'iteration(s).'}; IF maxn>10 THEN DO; save=r[1:10,1:10]; PRINT 'Working Correlation:', save; END; ELSE PRINT 'Working Correlation:', r; FREE save; crit=1; FINISH; RUN; /* End of iteration ***************************************/ START; /** Calculation of variance *************************************/ IF crit=0 THEN STOP; RUN estr; sigma=SQRT(sigma/k); m0=J(p,p,0); m1=J(p,p,0); dev=0; null={'_NULL_'}; C1={ FIT RES SRES }; IF null = {&OUT} THEN DO; END; ELSE DO; out={ 0 0 0 }; id={'12345678'}; CREATE &OUT FROM out [ROWNAME=id COLNAME=C1]; SETIN &DATA; END; nx=0; DO j=1 TO k; nj=ni[j]; i=(nx+1):(nx+nj); nx=nx+nj; READ VAR labely INTO yvar POINT i; READ VAR labelx INTO xvar POINT i; IF labeln^={'_1_'} THEN DO; READ VAR labeln INTO n POINT i; END; IF labelo^={'_0_'} THEN DO; READ VAR labelo INTO offset POINT i; END; *** Calculate ui and di; lp=xvar*beta+offset; IF link=1 THEN DO; ui=lp; di=xvar; END; ELSE IF link=2 THEN DO; ui=EXP(lp); di=ui#xvar; END; ELSE IF link=3 THEN DO; ui=EXP(lp); ui=(n#ui)/(1+ui); di=(ui#(1-ui/n))#xvar; END; ELSE IF link=4 THEN DO; ui=1/(lp); di=-(ui#ui)#xvar; END; ei=yvar-ui; dev=dev+SSQ(ei)/nj; IF null = {&OUT} THEN DO; END; ELSE out=ui||ei; *** Calculate a*ei and a*di; IF vari=1 THEN ui=1; ELSE IF vari=2 THEN ui=1/SQRT(ui); ELSE IF vari=3 THEN ui=1/SQRT(ui#(1-ui/n)); ELSE IF vari=4 THEN ui=1/ABS(ui); di=(di#ui)/sigma; ei=(ei#ui)/sigma; IF null = {&OUT} THEN DO; END; ELSE DO; out=out||ei; id=J(nj,1,CHAR(j,8,0)); SETOUT &OUT; APPEND FROM out [ROWNAME=id]; SETIN &DATA; END; /* End of output file process */ vinv=INV(r[1:nj,1:nj]); m0=m0+di`*vinv*di; i=ei`*vinv*di; m1=m1+i`*i; END; *** End of variance(beta) estimation loop; nvar=INV(m0); rvar=nvar*m1*nvar; FREE di ei i j m0 m1 nj nx ui vinv xvar yvar; *show names; FINISH; RUN; /* End of variance ****************************************/ START; /* Outputs ******************************************************/ IF crit=0 THEN STOP; sigma=sigma#sigma; dev=dev/k; ns=1/SQRT(VECDIAG(nvar)); rs=1/SQRT(VECDIAG(rvar)); rbeta=beta#rs; DO i=1 TO p; DO j=i+1 TO p; nvar[j,i]=nvar[i,j]#ns[i]#ns[j]; rvar[j,i]=rvar[i,j]#rs[i]#rs[j]; END; END; R1={'Scale parameter:'}; R2={'Mean Squared Error:'}; PRINT sigma [ROWNAME=R1], dev [ROWNAME=R2] /; PRINT 'Variance estimate (naive):',, nvar [ROWNAME=labelx COLNAME=labelx]; PRINT 'Variance estimate (robust):',, rvar [ROWNAME=labelx COLNAME=labelx], 'NOTE: Covariances are above diagonal and correlations are below' 'diagonal.',; ns=1/ns; rs=1/rs; C1={ 'Estimate' }; C2={ ' s.e.-Naive' }; C3={ ' z-Naive' }; C4={ 's.e.-Robust' }; C5={ 'z-Robust' }; PRINT / 'Estimate, s.e. and z-score:',, labelx beta [COLNAME=C1] ns [COLNAME=C2 FORMAT=11.3] rs [COLNAME=C4 FORMAT=11.3] rbeta [COLNAME=C5 FORMAT=8.2],; IF ALL(^war) THEN PRINT '*** WARNING: No intercept term in the model!'; PRINT ' ',,'(c) M. Rezaul Karim, 1989', 'Department of Biostatistics, The Johns Hopkins University'; FINISH; RUN; /* End of outputs *****************************************/ QUIT; %MEND;