/* file: diag102.sas

GEE WITH DIAGNOSTICS - Version 1.02

SAS/IML MACRO FOR LONGITUDINAL DATA ANALYSIS AND GEE DELETION DIAGNOSTICS 
BY JOHN PREISSER - THE UNIVERSITY OF NORTH CAROLINA-CHAPEL HILL


PURPOSE  
Fit GEE to population averaged models with option to request cluster-deletion 
and observation-deletion diagnostics, described in Preisser & Qaqish 
(Biometrika 1996:83,551-562).


VERSION NOTES
v1.00
  * Correction made 07/09/03 - Division by phi added to Cook's D
  * Minor modifications by BQ, November 2003

v1.01
  * Option to use empirical variance estimator as norm for Cook's D instead 
    of model-based estimator

v1.02
  * Added correlation structures: M-dependent (stationary and non-stationary), 
    Autoregressive-1, and Unstructured
  * Added bias-corrected standard errors as described in Mancl & DeRouen 
    (Biometrics 2001:57,126-134), made them available for output to a 
    dataset, and made than available to be used when standardizing DFBETAs 
    and Cooks D.
  * Added standardized DFBETAs to diagnostic output datasets
  * Added range checks on correlation matrix elements for binary response
    variables to ensure that bivariate predicted probabilites are non-negative
  * Many other minor modifications and code cleanining
  * Cluster-level output dataset for diagnostics maintains cluster id variable
    name and vartype for simple re-merging with original data, if desired
  * Changed convergence criteria to check for absolute or relative differences 
    in betas between iterations


PARAMETERS SPECIFIED {Default value}

%GEE  ( 
   DATA = SAS dataset,                                     { &syslast }
   YVAR = y-variable,                                      { required }
   XVAR = x-variables,                                     { required }
   ID   = id-variable,                                     { required }
   TIME = within cluster variable                          { }
   LINK = link function,                                   { required }
   VARI = mean-variance relation,                          { required }
   CORR = correlation structure,                           { required }
   N    = binomial denominator variable,                   { }
   M    = dependence,                                      { 1 }
   R    = given correlation matrix,                        { }
   SCALE= scale parameter,                                 { }
   BETA = initial estimate of beta,                        { }
   OFFS = offset variable,                                 { }
   NCOVOUT = output dataset of beta, model-based se, 
             and model-based covariance matrix,            { }
   RCOVOUT = output dataset of beta, empirical se, 
             and empirical covariance matrix,              { }
   BCOVOUT = output dataset of beta, bias-corrected se, 
             and bias-corrected covariance matrix,         { }
   OBSOUT = output dataset of observation diagnostics,     { }
   CLSOUT = output dataset of cluster diagnostics,         { }
   NORM = Cook's D and DFBETAs normed by model-based 
          (1, default), Empirical (2), or bias-corrected (3)
          variance estimator                               { 1 }
   ITER     = maximum iterations,                          { 20 }
   MONITOR  = print out iterations (YES | NO)              { NO }
    CRITTYPE = type of convergence criterion (REL | ABS)    { REL }
   CRIT     = convergence criterion                        { 1E-5 }
    BINRANGE = binary range checks enforced (Y | N)         { Y }
)


REQUIRED MACRO SPECIFICATIONS
To run the macro, the user is required to provide a SAS dataset 
(DATA) with response variable (YVAR), a list of independent variables 
(XVAR), cluster identifier variable (ID), link (LINK) and variance 
(VARI) function, and working correlation structure (CORR).  

If no dataset name is given, the last working SAS dataset is used.
Only one response variable may be given.  If an intercept term is 
desired in the model, the intercept variable must be explicitly 
included with the covariate list.  Options for LINK, VARI, and CORR 
are below.

    The following choices of link (LINK) function are available:
       1 - Identity
       2 - Logarithm
       3 - Logit
       4 - Reciprocal
    
    The following choices of variance (VARI) function are available:
       1 - Gaussian
       2 - Poisson
       3 - Binomial
       4 - Gamma
    
    The following choices of correlation (CORR) structures are available:
       1 - Independence
        2 - Stationary m-dependent
        3 - Non-stationary m-dependent
        4 - Exchangeable
        5 - Autoregressive(1)
        6 - Unstructured
        7 - User-defined, R must be given when macro is called
    
For m-dependent correlation structures, (M) should be specified.  If 
it is not specified, the default is 1-dependence.  For user-defined 
correlation structures, all elements of (R) must be given in one string
without commas.  The macro creates the square matrix.


OPTIONAL SPECIFICATIONS
A within-cluster ordering variable (TIME) can be specified if desired. 
The possible values of this variable must be positive consecutive 
integers starting with 1 or the macro will not work correctly.  If a time 
variable is specified, the data will be pre-sorted by cluster ID and time.  
Otherwise, the data will be used as ordered in the dataset when the macro
is called.

A denominator (N) is required for binomial data, but if not specified is
assumed to equal 1.  Scale is assumed to equal 1 for binary data, but 
is otherwise estimated.  Optionally, it may be set to equal 1 (or some 
other value) with the (SCALE) option.  [Note that the scale parameter is
assumed to be constant across all observations.]  The (BETA) option may be 
used to specify starting values for the regression coefficient estimates, 
otherwise GLiM estimates are used as starting values.  The offset option 
(OFFS) specifies a SAS variable containing offsets (these are used for 
example in poisson regression with unequal exposure periods).

The user can control the maximum number of iterations allowed (ITER), 
the convergence criteria (CRIT), and can print out details of each 
iteration (MONITOR).  Convergence is determined by the magnitude of the
maximum absolute or relative (default) change in the betas between 
iterations.  Checking for absolute or relative changes can be set by 
the user (CRITTYPE).

There are a number of output datasets that can be requested by the user.  
For a dataset that contains beta estimates, standard errors, and the
covariance matrix, enter an output dataset name for any or all of the 
following options:

    For the model-based (naive) SEs and covariance matrix         (NCOVOUT)
    For the empirical sandwich (robust) SEs and covariance matrix (RCOVOUT)
    For the bias-corrected SEs and covariance matrix              (BCOVOUT)

For datasets that contain regression diagnostics, including Cooks distance,
DFBETA, DFBETAS (standardized), and leverage, enter an output dataset name 
for either or both of the following options:

    For cluster-level diagnostics     (CLSOUT)
    For observation-level diagnostics (OBSOUT)
    
Cooks distance and DFBETAS are standardized by either the model-based, 
empirical, or bias-corrected variance estimators.  Use the (NORM) option to 
designate which to use: Model-based = 1 (default), Empirical = 2, Bias-
corrected = 3.

As noted in Prentice (Biometrics 1988:44, 1033-1048), among other places, 
there are restrictions placed on the range of values that the correlation 
coefficients in R may take for binary response data.  An option has been 
added to this macro (BINRANGE, when set to Y) to allow the user to enforce 
these ranges, which have the effect of ensuring non-negative joint 
probabilities for all within-cluster pairs of observations.


REGRESSION DIAGNOSTICS
This macro provides computational formulae for case-deletion regression
diagnostics (Preisser & Qaqish, 1996).  These diagnostics are
generalizations of Cook's distance, DFBETA and leverage for linear
regression, and their counterparts for generalized linear models.  They
are an approximation to the difference in the estimated regression
coefficients that one would obtain upon deleting either one observation
or one cluster.  The diagnostics are sometimes called "one-step"
diagnostics because they are equal to the procedure that upon
convergence of the iteratively reweighted least squares algorithm to the
GEE solution, applies one more iteration after deletion of an
observation (or cluster).  The difference in the regression coefficients
one would obtain from such a procedure is equivalent to the value of the
diagnostic.  Because, however, of computational formula for the
diagnostics, no additional iterations are required in the computing
algorithm to obtain the full set of observation-deletion and
cluster-deletion diagnostics.

Diagnostics are not provided automatically by the software, but may be
requested with optional statements declared in the macro call.  To
request observation-deletion diagnostics, one set for each observation,
use the statement OBSOUT.  For example, OBSOUT=infobs will create a sas
dataset "infobs" that will contain the influence diagnostics for the
observations, including cook's distance, DFBETA, DFBETAS (standardized),
observation leverage, cluster size of the cluster to which the observation 
belongs, fitted value, raw residual, and standardized residual (raw 
residual divided by the variance function).  The statement CLSOUT is used 
to obtain cluster-deletion diagnostics.  For example, CLSOUT=infcls will 
create a sas dataset "infcls" that will contain the influence diagnostics 
for the clusters, including cook's distance, DFBETA, DFBETAS (standardized), 
cluster leverage, cluster size, and a quadratic summary of the standardized 
residual vector of the cluster (the usefulness of this last statistic is 
yet to be investigated).

Unstandardized and standardized DFBETAs are produced by the macro.  In the
output dataset, the variables containing the unstandardized values take the
name of the covariate with DFBETA_ as the prefix; the variables containing 
the standardized values take the name of the covariate with DFBETAS_ as the 
prefix.  While these prefixes are long, they allow differentiation from the
original covariates if the diagnostic datasets are merged back onto the raw
data.  

The user may compute standardized DFBETAs using a different norm--either 
model-based, empirical sandwich, or bias-corrected standard errors--by first 
requesting an output dataset using the NCOVOUT, RCOVOUT, of BCOVOUT options, 
respectively.  These output data sets are also useful for constructing 
contrasts and hypothesis tests, for example, using SAS/IML.

OUTPUT DIAGNOSTICS DATASETS
The observation-level dataset specified with the OBSOUT option will
contain the following variables:
    I                   Sequential cluster number 
    IJ                  Sequential observation number
    NI                  # of records in the cluster
    FIT                 Predicted value for obs
    RES                 Unstandardized residual
    SRES                Standardized residual
    QWOBS               Leverage: Diagonal element of H matrix for obs
    COOKDOBS            Cooks D
    DFBETA_<xvars>      Unstandardized DFBETA values
    DFBETAS_<xvars>     Standardized DFBETA values

The cluster-level dataset specified with the CLSOUT options will
contain the following variables:
    I                   Sequential cluster number
    NI                  # of records in the cluster
    TRQWCLS             Leverage: Trace of H matrix for cluster
    COOKDCLS            Cooks D
    GCLS                Scalar summary of the residual vector Ei for cluster,
                        tr(Ei)[var(Ei)]^{-1}Ei (similar to MCLS_i on p557 of 
                        Preisser & Qaqish, 1996, but without the Hi and p)
    DFBETA_<xvars>      Unstandardized DFBETA values
    DFBETAS_<xvars>     Standardized DFBETA values

***********************************************************************/

%macro gee( DATA = &syslast,
            YVAR = ,
            XVAR = ,
            ID   = ,
            TIME = ,
            LINK = ,
            VARI = ,
            CORR = ,
            N    = ,
            M    = 1,
            R    = ,
            SCALE= ,
            BETA = ,
            OFFS = ,
            NCOVOUT = ,
            RCOVOUT = ,
            BCOVOUT = ,
            OBSOUT  = ,
            CLSOUT  = ,
            NORM = 1,
            ITER = 50,
            MONITOR = NO,
                CRITTYPE = REL,
            CRIT = 1e-5,
                BINRANGE = Y
);

options nocenter;

*****************************************************************;

/* If time variable is specified then pre-sort by id and 
   time before importing to IML */

%if %length(&TIME)>0 %then %do;
    proc sort data=&DATA;
        by &ID &TIME;
    run;
%end;

*****************************************************************;

proc iml worksize = 30000 symsize = 1000;

reset noprint noname fuzz;

*****************************************************************;

/* Define link function */

start linkid(lp, den, offset); 

    %IF &link = 1 %THEN %DO;
        %IF (%length(&offs) = 0) %THEN %DO;
            return(lp); 
        %END;
        %ELSE %DO;
            return(lp+offset); 
        %END;
    %END;
    
    %ELSE %IF &link = 2 %THEN %DO;
        %IF (%length(&offs) = 0) %THEN %DO;
            return(exp(lp)); 
        %END;
        %ELSE %DO;
            return(exp(lp+offset)); 
        %END;
    %END;
    
    %ELSE %IF &link = 3 %THEN %DO;
        %IF (%length(&offs) = 0) %THEN %DO;
            ui = exp(lp); 
            return((den#ui)/(1+ui)); 
        %END;
        %ELSE %DO;
            ui = exp(lp+offset); 
            return((den#ui)/(1+ui)); 
        %END;
    %END;
    
    %ELSE %IF &link = 4 %THEN %DO;
        %IF (%length(&offs) = 0) %THEN %DO;
            return(1/lp);  
        %END;
        %ELSE %DO;
            return(1/(lp+offset));  
        %END;
    %END;

finish; /* linkid */

*****************************************************************;

/* Define derivative */

start deri(ui, den, nobs); 

    %IF &link = 1 %THEN %DO;
        return(J(nobs, 1, 1)); 
    %END;
    
    %ELSE %IF &link = 2 %THEN %DO;
        return(ui);  
    %END;
    
    %ELSE %IF &link = 3 %THEN %DO;
        return(ui#(1-ui/den));  
    %END;
    
    %ELSE %IF &link = 4 %THEN %DO;
        return(-(ui#ui));  
    %END;

finish; /* deri */

*****************************************************************;

/* Define variance function */

start varfun(ui, den, nobs);

    %IF &vari = 1 %THEN %DO;
        return(J(nobs, 1, 1));
    %END;
    %ELSE %IF &vari = 2 %THEN %DO;
        return(ui);
    %END;
    %ELSE %IF &vari = 3 %THEN %DO;
        return(ui#(1-ui/den));
    %END;
    %ELSE %IF &vari = 4 %THEN %DO;
        return(ui#ui);
    %END;

finish; /* varfun */

*****************************************************************;

/* Define deviance function */

start devi(ui, den, resid, y);

    %IF &vari = 1 %THEN %DO;
        return(sum((resid)##2));
    %END;
    %ELSE %IF &vari = 2 %THEN %DO;
        return(2#sum(y#log((y+.0001)/ui)-resid));
    %END;
    %ELSE %IF &vari = 3 %THEN %DO;
        abino = y#log((y+.0001)/ui);
        bbino = (den-y)#log((den-y+.0001)/(den-ui));
        return(2#sum(abino+bbino));
    %END;
    %ELSE %IF &vari = 4 %THEN %DO;
        agam = -log((y+.0001)/ui);
        bgam = resid/ui;
        return(2#sum(agam+bgam));
    %END;

finish; /* devi */

*****************************************************************;

/* Define (A*D) inverse vector */

start canon(vu, di);

    %IF &link = &vari %THEN %DO;
        return(sqrt(vu));
    %END;
    %ELSE %DO;
        return((1/sqrt(vu))#di);
    %END;

finish; /* canon */

*****************************************************************;

/* Define scale parameter */

start scale(pearchi, nobs, p);

    %IF (%length(&scale) = 0) %THEN %DO;
        return(pearchi/(nobs-p)); 
    %END;
    %ELSE %DO;
        return(&scale);
    %END;
    
finish; /* scale */

*****************************************************************;

/* Creates R matrix for requested correlation structure */

start correl(R, K, ni, rit, phi, T, time, m, minn, maxn, p, oldr);

    %IF &corr=1 | &corr=7 %THEN %DO; /* Independent or given */
        r = oldr;
    %END;

    %ELSE %IF &corr=2 %THEN %DO;  /* stationary M-dependent */
        newm = m + 1;
        negm = -1 * newm;

        nx=0;
        alp=J(1, newm, 0);
        bot=J(1, newm, 0);
        do h=1 to K;
            nj=ni[h];
            if nj>1 then do;
                ej=rit[nx+1:nx+nj];
                if (%length(&TIME)=0 & minn>m) | (minn=maxn & maxn=T) then do;
                    alp=alp+covlag(ej, negm)#nj;
                    bot=bot+(nj+1-(1:newm));
                end;
                else do;
                    if %length(&TIME)=0 then do;
                        fullej=shape(ej, T, 1, 0);
                        countj = j(nrow(ej), 1, 1);
                        fullcountj = shape(countj, T, 1, 0);
                    end;
                    else do;
                        timej=time[nx+1:nx+nj];
                        fullej=j(T, 1, 0);
                        fullej[timej]=ej;
                        fullcountj = j(T, 1, 0);
                        fullcountj[timej]=1;
                    end;
                    alp=alp+covlag(fullej, negm)#T;
                    bot=bot+covlag(fullcountj, negm)#T;
                end;
            end;
            nx=nx+nj;
        end;

        alp=alp/(phi#(bot-p));
        alp=shape(alp, 1, T, 0);
        alp[1] = 1;

        /* Check for out-of-range alpha */

        if m = 1 then do;
            if abs(alp[2]) >= -1 / (2 # cos(maxn # 3.14159 / (maxn + 1))) then do;
                alp[2] = -sign(alp[2]) / (2 # cos((maxn + 1) # 3.14159 / (maxn + 2)));
                print, 'Estimated alpha is out of range and had been set to ' alp[2];
            end;
        end;
        else do i = 2 to newm;
            if alp[i] > 1 then do;
                alp[i] = .99;
                print "An estimated element of R is out of range and has been set to .99";
            end;
        end;

        R=toeplitz(alp);    
    %END;

    %ELSE %IF &corr=4 %THEN %DO; /* exchangeable correlation */
        nx=0;
        top=0;
        bot=0;
        do h=1 to K;
            nj=ni[h];
            ej=rit[nx+1:nx+nj];
            top=top + (sum(ej*ej`)-ssq(ej))/2;
            bot=bot+nj#(nj-1)/2;
            nx=nx+nj;
        end;
        alp=(top/(bot-p))/phi;

        * Range checks on alpha;
        if alp<= 1/(1-maxn) then do;
            alp = .99/(1-maxn);
            print, 'Estimated alpha is out of range and had been set to ' alp;
        end;
        else if alp >= 1 then do;
            alp = .99;
            print, 'Estimated alpha is out of range and had been set to .99';
        end;

        r = xch(T, alp);
    %END;

    %ELSE %IF &corr=5 %THEN %DO; /* AR-1 */
        nx=0;
        alp=J(1, 2, 0);
        bot=J(1, 2, 0);
        do h=1 to k;
            nj=ni[h];
            if nj>1 then do;
                ej=rit[nx+1:nx+nj];
                if %length(&TIME)=0 | (minn=maxn & maxn=T) then do;
                    alp=alp+covlag(ej, -2)#nj;
                    bot=bot+(nj+1-(1:2));
                end;
                else do;
                    timej=time[nx+1:nx+nj];
                    fullej=j(T, 1, 0);
                    fullej[timej]=ej;
                    fullcountj = j(T, 1, 0);
                    fullcountj[timej]=1;
                    alp=alp+covlag(fullej, -2)#T;
                    bot=bot+covlag(fullcountj, -2)#T;
                end;
            end;
            nx=nx+nj;
        end;

        alp=alp/(phi#(bot-p));
        alp=shape(alp, 1, T, 0);

        /* Check for out-of-range alpha */
        if alp[2] > 1 then do;
            alp[2] = .99;
            print "Estimated alpha is out of range and has been set to .99";
        end;

        do q=1 to T;
            alp[q]=alp[2]##(q-1);
        end;

        R=toeplitz(alp);    
    %END;

    %ELSE %IF (&corr=3 OR &corr=6) %THEN %DO; /* non-stationary M-dependent and unstructured correlation */ 
        nx=0;
        alp=J(T, T, 0);
        if minn = maxn & maxn = t then do;
            do h=1 to K;
                nj=ni[h];
                ej=rit[nx+1:nx+nj];
                ej=shape(ej, T, 1, 0);
                alp=alp+ej*ej`;
                nx=nx+nj;
            end;

            r=alp/(phi#(K-p));
        end;
        else do;
            bot=J(T, T, 0);
            do h=1 to K;
                nj=ni[h];
                ej=rit[nx+1:nx+nj];

                if %length(&TIME)=0 then do;
                    fullej=shape(ej, T, 1, 0);
                    countj = j(nrow(ej), 1, 1);
                    fullcountj = shape(countj, T, 1, 0);
                end;
                else do;
                    timej=time[nx+1:nx+nj];
                    fullej=j(T, 1, 0);
                    fullej[timej]=ej;
                    fullcountj = j(T, 1, 0);
                    fullcountj[timej]=1;
                end;

                alp=alp + fullej * fullej`;
                bot=bot + fullcountj * fullcountj`;
                nx=nx+nj;
            end;
        r = alp/(phi#(bot-p));
        end;

        /* Set R diagonal to 1 */
        do j=1 to T; 
            r[j, j]=1;
                if &corr=3 then do; /* non-stationary M-dependent, 
                                   set R to 0 where |i-j| > M */ 
                    do i=j+m+1 to T;
                        r[i, j]=0; 
                        r[j, i]=0;
                    end;
                end;
        end;  

        /* Check for out-of-range values */
        do i = 1 to (T-1);
            do j = i+1 to T;
                if r[i, j] > 1 then do;
                    r[i, j] = .99;              
                    r[j, i] = .99;
                    print 'An estimated element of R is out of range and has been set to .99';
                end;
            end;
        end;
        
    %END;

finish; /* correl */

*******************************************************************;

/* Get and print data info */

start info;

    L_1 = {'(Identity)'};
    L_2 = {'(Logarithm)'};
    L_3 = {'(Logit)'};
    L_4 = {'(Reciprocal)'};

    V_1 = {'(Gaussian)'};
    V_2 = {'(Poisson)'};
    V_3 = {'(Binomial)'};
    V_4 = {'(Gamma)'};

    C_1 = {'(Independent)'};
    C_2 = {"(Stationary &M-dependent)"};
    C_3 = {"(Non-stationary &M-dependent)"};
    C_4 = {'(Exchangeable)'};
    C_5 = {'(Autoregressive-1)'};
    C_6 = {'(Unstructured)'};
    C_7 = {'(User-defined)'};

    R1 = {'  Data File:'};
    R2 = {'    Outcome:'};
    R3 = {' Covariates:'};
    R4 = {'     Offset:'};

    R5 = {'       Link:'};
    R6 = {'   Variance:'};
    R7 = {'Denominator:'};
    R8 = {'Correlation:'};

    PRINT / 
        'GEE with deletion diagnostics ( Ver - 1.02 )',
        '============================================',,
        {"&DATA"} [ROWNAME = R1],
        {&YVAR} [ROWNAME = R2],
        {&XVAR} [ROWNAME = R3],
        %IF (%length(&OFFS)^= 0) %THEN %DO;
            {&OFFS} [ROWNAME = R4],
        %END;
        ,
        &LINK [ROWNAME = R5 FORMAT = 1.0] L_&LINK,  
        &VARI [ROWNAME = R6 FORMAT = 1.0] V_&VARI,
        %IF (%length(&N)^= 0) %THEN %DO;
            {&N} [ROWNAME = R7],
        %END;
        &CORR [ROWNAME = R8 FORMAT = 1.0] C_&CORR
        ;
    
finish; /* info */

*****************************************************************;

/* Describe clustered data */

start desc(ni, k, maxn, minn, nobs);

    use &DATA NOBS nobs;
    summary var {&YVAR &XVAR } class {&ID} stat {mean} opt {noprint save};
    close &DATA;

    k = nrow(_nobs_);
    maxn = MAX(_nobs_);
    minn = MIN(_nobs_);
    ni = _nobs_;

    R1 = {'    Total number of records read:'};
    R2 = {'        Total number of clusters:'};
    R3 = {'Minimum and maximum cluster size:'};
    R4 = {'and'};

    print
        nobs [ROWNAME = R1], 
        k    [ROWNAME = R2], 
        minn [ROWNAME = R3]
        maxn [ROWNAME = R4];

finish; /* desc */


*****************************************************************;

/* Get initial beta estimates */

start firstb(beta, xwx, ad, yvar, xvar, den, p, labelx, nobs, offset);

    %IF &VARI = 3|&link = 3 %THEN %DO;
        ui = choose(mod(yvar, 1) = 0, (1-yvar)#0.1 + yvar#0.9, yvar);
    %END;
    %ELSE %IF &VARI = 2|&link = 2 %THEN %DO; 
        ui = choose(yvar<= 0, .01, yvar); 
    %END;
    %ELSE %IF &VARI = 4|&link = 4 %THEN %DO; 
        ui = choose(yvar = 0, .01, yvar); 
    %END;
    %ELSE %DO; 
        ui = yvar; 
    %END;

    %IF &link = 1 %THEN %DO; 
        lp = ui; 
    %END;
    %ELSE %IF &link = 2 %THEN %DO; 
        lp = log(ui); 
    %END;
    %ELSE %IF &link = 3 %THEN %DO; 
        lp = log(ui/(den-ui)); 
    %END;
    %ELSE %IF &link = 4 %THEN %DO; 
        lp = 1/ui; 
    %END;

    vu = varfun(ui, den, nobs);

    %IF (%length(&beta) = 0) %THEN %DO;
        run glim(beta, xwx, ad, yvar, xvar, den, offset, lp, ui, vu, nobs);
    %END;
    %ELSE %DO;
        beta = { &BETA };
        beta = SHAPE(beta, p, 1, 0);
        di = deri(ui, den, nobs);
        ad = canon(vu, di);
        adx = xvar#ad#ad;
        xwx = xvar`*adx;
    %END;

    %IF %upcase((&monitor) = yes) %THEN %DO;
        PRINT / 'Initial estimate of regression coefficients:', labelx beta;
    %END;

finish; /* firstb */


*****************************************************************;

/* Set initial R depending on CORR */

start setr0(r0, ok, T); 

    %IF &CORR = 1 %THEN %DO;
        r0 = I(T);
    %END;
    %ELSE %IF &CORR = 7 %THEN %DO;
        %IF %length(&R) = 0 %THEN %DO;
            print "ERROR: R must be given when CORR = 7 (User-defined)"; 
            ok = 0;
        %END;
        %ELSE %DO;
            r = { &R };
            a = sqrt(ncol(r));
            b = int(a);

            if a^= b then do;
                print "ERROR: Given correlation matrix not square"; 
                ok = 0; 
            end;
            else do;
                r0 = shape(r, a, a);
                tr0 = r0`;
                
                if any(r0^= tr0) then do;
                    print "ERROR: Given correlation matrix not symmetric"; 
                    ok = 0; 
                end;
                else if ncol(R0)^= T then do;
                    print "ERROR: Dimension of given correlation matrix is not compatible with data";
                    ok = 0; 
                end;
                else do; 
                end;
            end;
        %END;
    %END;
    %ELSE %DO; 
        r0 = xch (T, 0.7); 
    %END;

finish; /* setr0 */


*****************************************************************;

/* Initial loop -- Lots of validity/consistency checks,
   also calls for data info and cluster descriptions  */

start init;

    ok = 1;

    %IF %length(&YVAR) = 0 %THEN %DO;
        print "ERROR: Y variable has not been specified";  
        ok = 0;
    %END;
    %ELSE %DO;
        if ncol({&YVAR}) > 1 then do;
            print "ERROR: Multiple Y variables specified";
            ok = 0;
        end;
    %END;

    %IF %length(&XVAR) = 0 %THEN %DO;
        print "ERROR: Covariates have not been specified";  
        ok = 0;
    %END;

    %IF %length(&BETA) > 0 %THEN %DO;
        if ncol({&BETA}) ^= ncol({&XVAR}) then do;
            print "ERROR: Incorrect number of initial betas given";
            ok = 0;
        end;
    %END;

    %IF %length(&ID) = 0 %THEN %DO;
        print "ERROR: Cluster ID has not been specified";  
        ok = 0;
    %END;

    %IF (&link<1 | &link>4) %THEN %DO;
        print "ERROR: Please select valid link function";
        ok = 0;
    %END;
    
    %IF (&vari<1 | &vari>4) %THEN %DO;
        print "ERROR: Please select valid variance function";
        ok = 0;
    %END;

    %IF (&corr<1 | &corr>7) %THEN %DO;
        print "ERROR: Please select valid correlation structure";
        ok = 0;
    %END;

    if ok = 0 then return; 

    run info;

    run desc(ni, k, maxn, minn, nobs);

    %IF (%length(&TIME) = 0) %THEN %DO;
        time = 0; 
        T = maxn;
    %END;
    %ELSE %DO;
        use &DATA; 
        read all var {&TIME} into time;
        T = ncol(unique(time)); 
    %END;

    %IF &CORR = 2|&CORR = 3 %THEN %DO;
        if T<= &m then do;
            print "ERROR: Value of M for dependence is too large"; 
            ok = 0; 
        end;
    %END;


    run setr0(r0, ok, T);

finish; /* init */

*****************************************************************;

/* Inverts symmetric positive definite matrix using 
   generalized inverse of the Cholesky decomposition */

start findinv(a);
 
    b = half(a);   /* intentional: will bomb if A is not +ve semidefinite */
    c = ginv(b);
    ainv = c*c`;
    return(ainv);

finish; /* findinv */

*****************************************************************;

/* Runs simple generalized linear model */

start glim(newb, xwx, w, y, x, den, offset, lp, ui, vu, nobs);

    /* values of linear predictor (lp), mean (ui) and variance
        function (vu) were computed in estr module */

    di = deri(ui, den, nobs);
    z = lp+(1/di)#(y-ui);
    w = canon(vu, di);
    wx = x#w#w;
    wz = z#w#w;
    xwx = x`*wx;
    xwz = x`*wz;
    inxwx = findinv(xwx);
    newb = inxwx*xwz;

finish; /* glim */

*****************************************************************;

start estb(newb, F, ad, y, x, den, time, R, ni, K, nobs, p, lp, ui, vu);

    /* values of linear predictor (lp), mean (ui) and variance
        function (vu) were computed in estphi  module */

    di = deri(ui, den, nobs);
    z = lp+(1/di)#(y-ui);
    ad = canon(vu, di);
    nx = 0;
    F = J(p, p, 0);
    G = J(p, 1, 0);
    do h = 1 to K;
        nj = ni[h];
        if nj = 1 then rj = 1;
        %IF (%length(&TIME) = 0) %THEN %DO;
            else if time = 0 then rj = invcorr(R[1:nj, 1:nj]);
        %END;
        %ELSE %DO;
            else do;
                timej = time[nx+1:nx+nj];
                rj = invcorr(R[timej, timej]);
            end;
        %END;
        xj = x[nx+1:nx+nj, ];
        zj = z[nx+1:nx+nj];
        adj = ad[nx+1:nx+nj];
        adxj = xj#adj;
        adzj = zj#adj;
        xwxj = adxj`*rj*adxj;
        xwzj = adxj`*rj*adzj;
        F = F+XWXj;
        G = G+XWZj;
        nx = nx+nj;
    end;
    
    newb = solve(F, G);

finish; /* estb */

*****************************************************************;

/* Check the min/max correlation allowed based on mean vector 
   within each cluster.  Fix the calculated correlation structure 
   to conform if needed. */

start checkcorrel(R, p, T, k, minn, maxn, ni, time, m);

    q = 1 - p;

    nx = 0;

    mincorr = J(T, T, -1);
    maxcorr = J(T, T, 1);

    do h = 1 to K;
        nj = ni[h];

        minj = J(T, T, -1);
        maxj = J(T, T, 1);

        pj = p[nx+1:nx+nj];
        qj = q[nx+1:nx+nj];

        if %length(&TIME)=0 then 
            timej = (1:T)[1:nj];
        else
            timej = time[nx+1:nx+nj];
            
        do i = 1 to (nrow(timej) - 1);
            do j = i + 1 to nrow(timej);
                minj[timej[i], timej[j]] = max(
                    -sqrt(pj[i] * pj[j] / (qj[i] * qj[j])),
                    -sqrt(qj[i] * qj[j] / (pj[i] * pj[j]))
                    );

                maxj[timej[i], timej[j]] = min(
                    sqrt(pj[i] * qj[j] / (qj[i] * pj[j])),
                    sqrt(qj[i] * pj[j] / (pj[i] * qj[j]))
                    );
            end;
        end;

        mincorr = mincorr<>minj;
        maxcorr = maxcorr><maxj;

        nx=nx+nj;
    end;

    /* Check for out-of-bounds values and fix accordingly */

    /* Check minimums */
    if r = mincorr<>r then do;
    end;
    else do;
        print "** Range violations on minimum bivariate corr allowable **",
            "** Correlation structure adjusted to conform            **";

        %if &corr = 3 | &corr = 6 | &corr = 7 %then %do;  /* Elementwise replacement for Given, N-S MDep, Unstr */
            do i = 1 to T-1;
                do j = i+1 to T;
                    mincorr[j, i] = mincorr[i, j];
                end;
            end;
            r = mincorr<>r;
        %end;
        %else %if &corr = 4 %then %do; /* Exchangeable -- use max as new alpha */
            alp = max(mincorr);
            r = xch(T, alp);
        %end;
        %else %if &corr = 2 %then %do; /* Stationary MDep */
            r = mincorr<>r;
            alp = J(1, T, 0);
            alp[1] = 1;
            do i = 1 to M;
                alp[i+1]=max(vecdiag(r[1:t-i, i+1:t]));
            end;
            r = toeplitz(alp);
        %end;
        %else %if &corr = 5 %then %do; /* AR-1 -- No direct way to fix, so shrink until it works */ 
            /* Get current R alpha vector */
            alp = r[1,];

            /* Get mincorr R alpha vector */
            minr = mincorr<>r;
            minalp = J(1, T, 0);
            minalp[1] = 1;
            do i = 1 to M;
                minalp[i+1]=max(vecdiag(r[1:t-i, i+1:t]));
            end;

            /* Shrink alp until it works with minalp */
            factor = .95;
            shrunk = 0;

            do until (shrunk);
                do i = 2 to T;
                    alp[i] = alp[i] * (factor ** (i-1));
                end;

                if abs(alp) = abs(alp)><abs(minalp) then
                    shrunk = 1;
            end;

            r = toeplitz(alp);
        %end;
    end;

    /* Check maximums */
    if r = maxcorr><r then do;
    end;
    else do;
        print "** Range violations on maximum bivariate corr allowable **",
            "** Correlation structure adjusted to conform  **";

        %if &corr = 3 | &corr = 6 | &corr = 7 %then %do;  /* Elementwise replacement for Given, N-S MDep, Unstr */
            do i = 1 to T-1;
                do j = i+1 to T;
                    maxcorr[j, i] = maxcorr[i, j];
                end;
            end;
            r = maxcorr><r;
        %end;
        %else %if &corr = 4 %then %do; /* Exchangeable -- use min as new alpha */
            alp = min(maxcorr);
            r = xch(T, alp);
        %end;
        %else %if &corr = 2 %then %do; /* Stationary MDep */
            r = maxcorr><r;
            alp = J(1, T, 0);
            alp[1] = 1;
            do i = 1 to M;
                alp[i+1]=min(vecdiag(r[1:t-i, i+1:t]));
            end;
            r = toeplitz(alp);
        %end;
        %else %if &corr = 5 %then %do; /* AR-1 */ 
            r = maxcorr><r;
            base = J(1, T, 0);
            base[1] = 1;
            do i = 1 to T-1;
                base[i+1]=min(vecdiag(r[1:t-i, i+1:t]))**(1/i);
            end;

            alp = J(1, T, 0);
            alp[2] = min(base);
            do q=1 to T;
                alp[q]=alp[2]##(q-1);
            end;

            r = toeplitz(alp);
        %end;
    end;

finish;  /* checkcorrel */

*****************************************************************;

/* Estimate correlation structure and scale coefficient */

start estrphi(R, phi, pearchi, devi, lp, ui, vu, oldb, oldr, F, ad, y, x, 
              den, offset, nobs, p, ni, K, T, time, minn, maxn, m);

    lp = x*oldb;
    ui = linkid(lp, den, offset);
    resid = y-ui;
    vu = varfun(ui, den, nobs);
    rit = resid/sqrt(vu);  /* standardized residuals: input to estr */
    pearchi = ssq(rit);    /* psuedo pearson chi-square */
    
    devi = devi(ui, den, resid, y);      /* pseudo deviance  */

    phi = scale(pearchi, nobs, p);

    run correl(R, K, ni, rit, phi, T, time, m, minn, maxn, p, oldr);

    %IF %upcase((&monitor) = yes) %THEN %DO;
        %if &CORR = 4 %then %do;
            outalpha = r[1,2];
        %end;
        
        scale = sqrt(phi);
        print / 
            'Scale is: ' scale,
            'Pseudo deviance: ' devi,
            'Pseudo chi-square: ' pearchi,
            %if &CORR = 1 %then %do;
                'Working correlation matrix: Identity';
            %end;
            %else %if &CORR = 4 %then %do;
                'Working exchangeable correlation: ' outalpha;
            %end;
            %else %do;
                'Working correlation matrix: ',, R;
            %end;
    %end;

finish; /* estrphi */

*****************************************************************;

/* Function to return an n x n exchangeable correlation 
   matrix -- given rho as the correlation */

start xch(n, rho);
    
    auto  = {1} ||  j(1, n-1, rho);
    return (toeplitz(auto));

finish; /* xch */

*****************************************************************;

/* Function to return the inverse of an n x n exchangeable
   correlation matrix -- given rho as the correlation */

start xchinv(n, rho);

    c = 1/(1-rho);
    d = -rho/(1+rho*(n-1));
    auto  = (c + c*d) ||  j(1, n-1, c*d);
    return (toeplitz(auto));

finish; /* xchinv */

*****************************************************************;

/* Finds the inverse of the correlation matrix, n > 1 assumed  */

start invcorr(a);

    %IF &corr = 4 %THEN %DO;  /* exchangeable */
        ainv = xchinv(ncol(a), a[1,2]);
    %END;
    %ELSE %DO;
        ainv = findinv(a);
    %END;

    return(ainv);
    
finish; /* invcorr */

*****************************************************************;

/* Calculates the variance-covariance matrix and the 
   influence statistics */

start varinfl(y, x, labelx, den, offset, nobs, p, ni, K, T, time, R, 
    phi, oldb);

    /* Fix scale for variance calcs for binary dist when Den = 1 */
    fixscale = 0;
    %IF &VARI = 3 %THEN %DO;
        ncfix = ncol(unique(y));
        if ncfix = 2&(max(y) = 1)&(min(y) = 0)&(den = 1) then do;
            fixscale = 1;
            phi = 1;
        end;
    %END;

    lp = x*oldb;
    ui = linkid(lp, den, offset);
    di = deri(ui, den, nobs);
    ei = y-ui;
    vu = varfun(ui, den, nobs);
    ad = canon(vu, di);
    sei = ei/sqrt(vu);  /* standardized residuals: input to estr */
    pearchi = SSQ(sei);    /* pseudo pearson chi-square */

    devi = devi(ui, den, ei, y);      /* pseudo deviance  */


    /* estimate XWX and M1 for model-based and empirical variance estimators */

    nx = 0;
    xwx = J(p, p, 0);
    m1 = J(p, p, 0);

    do hh = 1 to k;
        nj = ni[hh];

        /*  determine rj */
        if nj = 1 then rj = 1;
        %IF (%length(&TIME) = 0) %THEN %DO;
            else if (time = 0) then rj = invcorr(R[1:nj, 1:nj]);
        %END;
        %ELSE %DO;
            else do;
                timej = time[nx+1:nx+nj];
                rj = invcorr(R[timej, timej]);
            end;
        %END;
        
        xj = x[nx+1:nx+nj, ];
        adj = ad[nx+1:nx+nj];
        adxj = xj#adj;
        XWX = XWX+adxj`*rj*adxj;
        seij = sei[nx+1:nx+nj];
        mj = adxj`*rj*seij;
        m1 = m1+mj*mj`;
        nx = nx + nj;
    end; /* end do loop over clusters */

    inxwx = FINDINV(XWX);


    /* estimate M1B for bias-corrected variance estimator */

    m1b = J(p, p, 0);
    nx = 0;

    do hh = 1 to k;
        nj = ni[hh];
        xj = x[nx+1:nx+nj, ];
        adj = ad[nx+1:nx+nj];
        eij = ei[nx+1:nx+nj];
        dij = di[nx+1:nx+nj];
        DE = eij/dij;

        %IF (%length(&TIME) = 0) %THEN %DO;
            timej = 1:nj;
        %END;
        %ELSE %DO;
            timej = time[nx+1:nx+nj];
        %END;
        
        if nj = 1 then do;
            rj = 1;
            wj = adj*adj;
        end; 
        else do;
            rj = invcorr(R[timej, timej]);
            wj = diag(adj)*rj*diag(adj);
        end;
        
        adxj = xj#adj;
        qj = xj*inxwx*xj`;
        qw = qj*wj;
        projinv = ginv(I(nj)-qw);
        mjb = adxj`*rj*diag(adj)*projinv*DE;
        m1b = m1b+mjb*mjb`;
        nx = nx + nj;
    end; /* end do loop over clusters */


    /* calculate all variance estimates */

    nvar = inxwx#phi;
    rvar = inxwx*m1*inxwx;
    bcvar = inxwx*m1b*inxwx;

    nvardiag = vecdiag(nvar);
    rvardiag = vecdiag(rvar);
    bcvardiag = vecdiag(bcvar);

    nvardiag = choose(nvardiag<0, 0, nvardiag);
    rvardiag = choose(rvardiag<0, 0, rvardiag);
    bcvardiag = choose(bcvardiag<0, 0, bcvardiag);
    
    ns = sqrt(nvardiag);
    rs = sqrt(rvardiag);
    bcs = sqrt(bcvardiag);

    nbeta = oldb/ns;
    rbeta = oldb/rs;
    bcbeta = oldb/bcs;

    /* calculate influence diagnostics */
    
    dfbetcls = J(k, p, 0);   /* dfbeta for a cluster */
    dfbetscls = J(k, p, 0);   /* dfbetas for a cluster */
    cookdcls = J(k, 1, 0);   /* cookd for a cluster */
    gcls = J(k, 1, 0);       /* lack of fit for a cluster */
    trqwcls = J(k, 1, 0);    /* trace of qw for a cluster */

    %IF %length(&OBSOUT)^= 0 %then %DO;
        i = J(nobs, 1, 0);       /* cluster indicator starting with 1 */
        ij = J(nobs, 1, 0);      /* observation indicator starting with 1 */
        nij = J(nobs, 1, 0);     /* cluster size for observation dataset */
        dfbetobs = J(nobs, p, 0);  /* dfbeta for an observation */
        dfbetsobs = J(nobs, p, 0); /* dfbetas for an observation */
        cookdobs = J(nobs, 1, 0);  /* cookd for an observation */
        qwobs = J(nobs, 1, 0);     /* diagonal element of qw for an observation */
    %END;
    
    nx = 0;
    ijx = 0;

    do hh = 1 to k;
        nj = ni[hh];
        xj = x[nx+1:nx+nj, ];
        adj = ad[nx+1:nx+nj];
        
        %IF (%length(&TIME) = 0) %THEN %DO;
            timej = 1:nj;
        %END;
        %ELSE %DO;
            timej = time[nx+1:nx+nj];
        %END;
        
        if nj = 1 then do;
            rj = 1;
            wj = adj*adj;
            vj = 1/wj;
        end; 
        else do;
            rj = invcorr(R[timej, timej]);
            wj = diag(adj)*rj*diag(adj);
            vj = (diag(1/adj))*R[timej, timej]*(diag(1/adj));
        end;
        
        qj = xj*inxwx*xj`;
        qw = qj*wj;
        trqwcls[hh] = trace(qw);
        eicls = ei[nx+1:nx+nj];
        DE = eicls/di[nx+1:nx+nj];
        projinv = ginv(I(nj)-qw);
        invarei = wj*projinv;
        hd = xj`*invarei*DE;
        difbetj = inxwx*hd;
        Dfbetcls[hh, ] = difbetj`;
        gi = eicls`*invarei*eicls/phi;
        gcls[hh] = gi;

        %IF &NORM=1 %THEN %DO;
            cookdcls[hh] = (hd`*difbetj)/(p#phi);
        %END;
        %ELSE %IF &NORM=2 %THEN %DO;
            if nrow(m1)=1 then m1inv=1/m1;
            else m1inv=findinv(m1);
            cookdcls[hh] = (hd`*m1inv*hd)/p;
        %END;
        %ELSE %IF &NORM=3 %THEN %DO; 
            if nrow(m1b)=1 then m1binv=1/m1b;
            else m1binv=findinv(m1b);
            cookdcls[hh] = (hd`*m1binv*hd)/p;
        %END;

        %IF %length(&OBSOUT)^= 0 %then %DO;
            if nj> 1 then do;
                do ii = 1 to nj;
                    ij[nx+ii] = ijx+1;
                    nij[nx+ii] = nj;
                    i[nx+ii] = hh;
                    comp = loc(timej^= ii);
                    vii = vj[comp, ii];
                    wii = wj[comp, ii];
                    Wti = wj[comp, comp]-(wii*wii`)/wj[ii, ii];
                    WV = wti*vii;
                    xtilt = xj[ii, ]`-xj[comp, ]`*WV;
                    qtil = xtilt`*inxwx*xtilt;
                    stil = vj[ii, ii]-vii`*WV;
                    DEtil = DE[ii]-DE[comp]`*WV;
                    hdobs = xtilt*DEtil/(stil-qtil);
                    difbetii = inxwx*hdobs;
                    Dfbetobs[nx+ii, ] = Difbetii`;
                    
                    %IF &NORM=1 %THEN %DO;
                        cookdobs[nx+ii] = (hdobs`*difbetii)/(p#phi);
                    %END;
                    %ELSE %IF &NORM=2 %THEN %DO;
                        cookdobs[nx+ii] = (hdobs`*m1inv*hdobs)/p;
                    %END;
                    %ELSE %IF &NORM=3 %THEN %DO; 
                        cookdobs[nx+ii] = (hdobs`*m1binv*hdobs)/p;
                    %END;
                    
                    qwobs[nx+ii] = qw[ii, ii];
                    ijx = ijx+1;
                end;
            end;
            else do;
                ij[nx+1] = ijx+1;
                nij[nx+1] = nj;
                i[nx+1] = hh;
                qwobs[nx+1] = trqwcls[hh];
                dfbetobs[nx+1, ] = difbetj`;
                cookdobs[nx+1] = cookdcls[hh];
                ijx = ijx+1;
            end;
        %END;
        
        nx = nx+nj;
    end;



    /* Standardize the DFBETAs */

    %IF &NORM=1 %THEN %DO;
        nsi = 1/ns;
        dfbetscls = dfbetcls # nsi`;

        %IF %length(&OBSOUT)^= 0 %then %DO;
            dfbetsobs = dfbetobs # nsi`;
        %END;
    %END;
    %ELSE %IF &NORM=2 %THEN %DO;
        rsi = 1/rs; 
        dfbetscls = dfbetcls # rsi`;

        %IF %length(&OBSOUT)^= 0 %then %DO;
            dfbetsobs = dfbetobs # rsi`;
        %END;
    %END;
    %ELSE %IF &NORM=3 %THEN %DO;
        bcsi = 1/bcs; 
        dfbetscls = dfbetcls # bcsi`;

        %IF %length(&OBSOUT)^= 0 %then %DO;
            dfbetsobs = dfbetobs # bcsi`;
        %END;
    %END;


    /* Output parameter estimates, SEs, z-scores, working correlation matrix, etc. */

    C1 = { 'Estimate' };
    C2 = { 'SE-Model' };
    C3 = { 'z-Model' };
    C4 = { 'SE-Empir'};
    C5 = { 'z-Empir'};
    print/ 
        'Estimate, s.e. and z-score:',,
        labelx oldb [colname = C1 Format = 9.4]
        ns   [colname = C2 Format = 9.4]
        nbeta[colname = C3 Format = 9.3]
        rs   [colname = C4 Format = 9.4]
        rbeta [colname = C5 Format = 9.3],;


    C1 = { 'Estimate' };
    C2 = { 'SE-BiasCorr' };
    C3 = { 'z-BiasCorr' };
    print 
        'SE based on bias-corrected covariance matrix (Mancl & DeRouen):',,
        labelx oldb [colname = C1 Format = 9.4]
        bcs  [colname = C2 Format = 12.4]
        bcbeta [colname = C3 Format = 12.3],;

    scale = sqrt(phi);

    if fixscale = 1 then
        scaleinfo = 'Binary response: Scale parameter is fixed at 1';
    else
        scaleinfo = 'Scale parameter computed as the square root of the normalized pseudo chi-square';

    print   
        '  Scale parameter: ' scale,
        '  Pseudo deviance: ' devi,
        'Pseudo chi-square: ' pearchi,,
        scaleinfo;

    %if &CORR = 1 %then %do;
        print 'Working correlation matrix: Identity';
    %end;
    %else %if &CORR = 4 %then %do;
        outalpha = r[1,2];
        print 'Working exchangeable correlation: ' outalpha;
    %end;
    %else %do;
        print 'Working correlation matrix: ', R;
    %end;


    /* Output requested datasets (order of below)
         Model-based covariance matrix
          Empirical covariance matrix
          Bias-corrected covariance matrix
          Observation-level diagnostics
          Cluster-level diagnostics        */


    %IF (%length(&NCOVOUT) ^= 0) %THEN %DO;
        nbetvar = oldb||ns||nvar;
        variable = { &XVAR };
        C1 = {   'estimate' 'model_se'  &XVAR };
        create &NCOVOUT from nbetvar [rowname = variable colname = C1];
        setout &NCOVOUT;
        append from nbetvar [rowname = variable];
    %END;


    %IF (%length(&RCOVOUT) ^= 0) %THEN %DO;
        rbetvar = oldb||rs||rvar;
        variable = { &XVAR };
        C1 = {   'estimate' 'empir_se'  &XVAR };
        create &RCOVOUT from rbetvar [rowname = variable colname = C1];
        setout &RCOVOUT;
        append from rbetvar [rowname = variable];
    %END;


    %IF (%length(&BCOVOUT) ^= 0) %THEN %DO;
        bcbetvar = oldb||bcs||bcvar;
        variable = { &XVAR };
        C1 = {   'estimate' 'biascorr_se'  &XVAR };
        create &BCOVOUT from bcbetvar [rowname = variable colname = C1];
        setout &BCOVOUT;
        append from bcbetvar [rowname = variable];
    %END;

    
    use &DATA; 
    read all var {&ID} into &id; 
    idtype = type(&id);
    if idtype = "N" then
        charid = char(&id);
    else
        charid = &id;
    %IF %length(&TIME) = 0 %THEN %DO;  
        obsid = charid;
    %END;
    %ELSE %DO; 
        join = j(nobs, 1, "_"); 
        time = char(time); 
        obsid = concat(charid, join, time);
        obsid = rowcatc(obsid);
    %END;

    XVAROUT  = "DFBETA_"  + {&XVAR};
    XVARSOUT = "DFBETAS_" + {&XVAR};

    %IF (%length(&OBSOUT) ^= 0) %THEN %DO;
        C1 = { I IJ NI FIT RES SRES QWOBS COOKDOBS } || XVAROUT || XVARSOUT ;
        xvar  = dfbetobs;
        xvars = dfbetsobs;
        obout = i||ij||nij||ui||ei||sei||qwobs||cookdobs||xvar||xvars;
        create &OBSOUT from obout [rowname = obsid colname = c1] ;
        setout &OBSOUT;
        append from obout [rowname = obsid];
    %END;
    
    %IF (%length(&CLSOUT) ^= 0) %THEN %DO;
        &id = (unique(&id))`;
        I = (1:k)`;
        xvar  = dfbetcls;
        xvars = dfbetscls;

        if idtype = "N" then do;
            C1 = { &id I NI TRQWCLS COOKDCLS GCLS } || XVAROUT || XVARSOUT ;
            clout = &id||i||ni||trqwcls||cookdcls||gcls||xvar||xvars;
            create &CLSOUT from clout [colname = c1]; 
            setout &CLSOUT;
            append from clout; 
        end;
        else do;
            C1 = { I NI TRQWCLS COOKDCLS GCLS } || XVAROUT || XVARSOUT ;
            clout = i||ni||trqwcls||cookdcls||gcls||xvar||xvars;
            create &CLSOUT from clout [rowname = &id colname = c1] ;
            setout &CLSOUT;
            append from clout [rowname = &id];
        end;
    %END;

finish; /* varinfl */

*****************************************************************;

start credit;

    print  ,,
        "(c) J. S. Preisser, B.G. Hammill 2003-2005"   ,
        "Department of Biostatistics "  ,
        "University of North Carolina " ,
        "Chapel Hill, N.C.";
        
finish; /* credit */

*****************************************************************;


*****************************************************************;

start main; /** Main iteration **/

    run init;

    if (^ok) then do;
        print "Macro halted -- See ERROR message(s) above";
        return;
    end;


    /* Create matrices for analysis */

    use &DATA; 
    
    m = {&M};
    labelx = {&XVAR}`;
    p = nrow(labelx);
    read all var {&YVAR} into yvar;
    read all var labelx into xvar;
    read all var {&ID} into id;

    %IF (%length(&OFFS)^= 0) %THEN %DO;
        read all var {&OFFS} into offset;
    %END;
    %ELSE %DO; 
        offset = 0; 
    %END;

    %IF (&VARI = 3 & (%length(&N)^= 0)) %THEN %DO;
        read all var {&N} into den;
    %END;
    %ELSE %DO; 
        den = 1; 
    %END;


    run firstb(beta, F, ad, yvar, xvar, den, p, labelx, nobs, offset);
    pearchi = -5;
    crit = 99;
    
    oldr = r0;
    
    DO iter = 1 to &ITER WHILE(crit>&CRIT);
        prevpear = pearchi;

        free R phi pearchi devi lp ui vu;

        run estrphi(R, phi, pearchi, devi, lp, ui, vu, beta, oldr, F, ad, 
            yvar, xvar, den, offset, nobs, p, ni, K, T, time, minn, maxn,
            m);

        %IF %upcase((&monitor) = yes) %THEN %DO;
            R1 = (' Iteration:');
            print iter [ROWNAME = R1 FORMAT = 3.0];
        %END;
    
        oldbeta = beta;

        free beta F w;
    
        %if (&binrange = Y & &vari = 3 & &corr ^= 1) %then %do;
            run checkcorrel(R, ui, T, k, minn, maxn, ni, time, m);
        %end;
        
        %IF &corr = 1 %THEN %DO;
            run glim(beta, F, ad, yvar, xvar, den, offset, lp, ui, vu, nobs);
        %END;
        %ELSE %DO;
            run estb(beta, F, ad, yvar, xvar, den,  time, R, ni, K, nobs, p, 
                lp, ui, vu);
        %END;
        
        %IF %upcase((&monitor) = yes) %THEN %DO;
            print / 'Estimate:',, labelx beta  [Format = 11.3],;
        %END;

        %IF %upcase(&crittype = ABS) %THEN %DO;
            I1 = ('Max absolute change in parameters:');
            crit = max(abs(oldbeta - beta));
        %END;
        %ELSE %DO;
            I1 = ('Max relative change in parameters:');
            crit = max(abs(1 - oldbeta/beta));
        %END;


        %IF %upcase((&monitor) = yes) %THEN %DO;
            print crit [ROWNAME = I1 FORMAT = 11.8]; 
        %END;

        oldr = R;
    end;
    
    iter = iter-1;

    IF (iter>= &ITER) THEN PRINT  /
        {'No Convergence after'} {&ITER} [FORMAT = 3.0] {'iterations.'};
    ELSE PRINT ' '/
        {'Convergence after'} iter [FORMAT = 3.0] {'iteration(s).'};


    %if (&binrange = Y & &vari = 3 & &corr ^= 1) %then %do;
        run checkcorrel(R, ui, T, k, minn, maxn, ni, time, m);
    %end;
    

    run varinfl(yvar, xvar, labelx, den, offset, nobs, p, ni, K, T, time, R,
        phi, beta);

    run credit;

finish; /* main */

*****************************************************************;

run main;

%mend GEE;