* blex.sas
*  (adds: range restrictions on RHO due to marginal means)
* xref: - Liang, K-Y. and Zeger, S. L. (1986)
          "Longitudinal data analysis using generalized linear models",
          Biometrika, vol 73, pp 13-22.
        - Zeger, S.L. and Liang, K-Y. (1986)
          "Longitudinal data analysis for discrete and continuous outcomes",
          Biometrics, vol 42, 121-130.
        - B034.SAS  S060.SAS S063.SAS
* does: - Analysis of correlated (including longitudinal)
          binary data (0/1 only) using generalized estimating
          equations (GEE).
        - Exchangeable correlation only.
        - Naive and robust var. est.
        - Optimized for the case where there is enough memory to
          hold the whole data set.
        - Memory requirements guidelines:
          Let N = # observations,
              p = # covariates,
              n = maximum cluster size.
          Memory use is dominated by the following
             8 * (N*(p+6)  + 2*n*p)    bytes.

        e.g. N = 10,000, p=10, n=200   ->   about   1.25 Megabytes.

        - Compatible with Karim's macro calls for: link, vari, corr.
          If specified, must have link=3, vari=3, corr=4.

        - output data sets if requested.

        - verifies all restrictions on rho (due to means, n);

* By:   - Bahjat Qaqish and Habib Moalem
          Department of Biostatistics
          CB 7400
          Chapel Hill,  NC 27599-7400
          (919) 966-7271
          qaqish@biostat.sph.unc.edu
* Date: - Original version, February 1994
        - Several later updates
***************************************************;
%macro gee   (
  data=,                /* required: dataset name */
  yvar=,                /* required: response 0/1 */
  xvar=,                /* required: covariates */
  id=,                  /* required: cluster id */
  offset=,              /* offset */
  link=3,               /* logit link only */
  vari=3,               /* binary variance only */
  corr=4,               /* exchangeable correlation only */
  epsilon=1.0e-4,       /* criterion for beta convergence */
  epsilon2=1.0e-2,      /* criterion for rho  convergence */
  monitor=no,           /* monitor convergence */
  beta=,                /* initial values */
  rho=0,                /* initial value */
  fixrho=no,            /* fixrho: no/yes */
  fixbeta=no,           /* fixbeta: no/yes */
  outest=,              /* estimtates and cov matrices dataset */
  outs=,                /* fitted values and residuals dataset */
  conf=0.95,            /* confidence intervals */
  maxiter = 25   );     /* max. # iterations */
***************************************************;
* check required parameters before starting IML;
%let err=0;
%let fixrho=%upcase(&fixrho);
%let fixbeta=%upcase(&fixbeta);
%let monitor=%upcase(&monitor);

%if %length(&data)=0 %then %do;
   %put E R R O R : data=dataset must be specified.;
   %let err=1;
%end;
%if %length(&yvar)=0 %then %do;
   %put E R R O R : yvar=response must be specified.;
   %let err=1;
%end;
%if %length(&xvar)=0 %then %do;
   %put E R R O R : xvar=covariates must be specified.;
   %let err=1;
%end;
%if %length(&id)=0 %then %do;
   %put E R R O R : id=identification number must be specified.;
   %let err=1;
%end;

%if (&link ^= 3) %then %do;
   %put E R R O R : link=3 is the only value allowed.;
   %let err=1;
%end;

%if (&vari ^= 3) %then %do;
   %put E R R O R : vari=3 is the only value allowed.;
   %let err=1;
%end;

%if (&corr ^= 4) %then %do;
   %put E R R O R : corr=4 is the only value allowed.;
   %let err=1;
%end;

%if &err=1 %then %goto stopit;
***************************************************;
proc sort data=&data; by &id; run;      * sort data by id;
***************************************************;
proc summary data=&data nway;
  var &yvar;
  by &id;
  output out=_N_ (keep=n) n=n;
  run;
***************************************************;
proc iml;
***************************************************;
start signon;

print
"Generalized estimating equations for exchangeably-
correlated binary responses (version 2.3)",
"(c) Bahjat Qaqish and Habib Moalem (1994, 1995)",
"Department of Biostatistics  ",
"CB 7400                      ",
"Chapel Hill,  NC 27599-7400  ",,;

finish signon;
***************************************************;
start info (k, sum_n, min_n, max_n, n, p);

  sum_n = sum(n);
  max_n = max(n);
  min_n = min(n);
  k = nrow(n);

  print
  "Data set: &data",
  "Response: &yvar",
  "Covariates: &xvar",
  %if %length(&offset)^=0 %then %do;
    "Offset: &offset",
  %end;
  "Cluster ID: &id",
  "Number of observations:"  sum_n,
  "Number of covariates:"    p,
  "Number of clusters:"      k,
  "Cluster sizes: from " min_n " to " max_n,
  ;

finish info;
***************************************************;
start psdinv (xinv, uinv, p, x);
  * xinv <- g-inverse of a symmetric,+ve, semi-definite matrix (x);
  * uinv <- g-inverse of root x;
  * p  <- rank(x);

  u = half(x);
  * u [redundant cols] <- 0;
  d = vecdiag(u);
  redcol = loc(d = 0);
  if (ncol(redcol)>0) then do;
      u[,redcol]=0;
  end;
  p = ncol(u)-ncol(redcol);

  uinv = ginv(u);
  xinv =  uinv * t(uinv);

finish psdinv;
***************************************************;
start  get1c (axi, zi, rhomin, rhomax, x, beta, y, offs, cluster, j1, j2);
  * compute zi, axi, rhomin, rhomax for one cluster;
  xi  = x [j1[cluster] : j2[cluster], ];
  yi  = y [j1[cluster] : j2[cluster], ];
  %if %length(&offset)=0 %then %do;
    ofs = 0;            * no offset;
  %end;
  %else %do;
    ofs = offs  [j1[cluster] : j2[cluster], ];  * offset;
  %end;
  run gee2 (zi, axi, rhomin, rhomax, xi, beta, yi, ofs);  * compute z, ax;
finish;
***************************************************;
start     acc1c (u, h, g, x1s, x2s, g1, g2, adx, z, rho, irth);
  * accumulate contribution from one cluster;
  * irth : irth * irth` = inv(info matrix from last iter);
  * adds contribution to u, h, g, x1s, x2s, g1, g2;
  * adx, z, rho, irth: not modified;
  * note: the factor c=1/(1-rho) is ignored,
        the final nvar = inv(h) must be rescaled;
  * this is optimal code for corr=4, n by n matrices avoided;

  n = nrow(adx);
  t = sum(z);
  d = -rho/(1+rho*(n-1));
  a = 1 + n * d;
  adxt = t(adx);
  e = adx[+,];

  h = h + (adxt * adx) + t(e) * (d*e);
  ui = adxt * (z + d*t);
  free adxt;
  u = u + ui;
  g = g + ui * t(ui);

  l = adx * irth;
  g1i = ssq(l);
  g2i = ssq(l[+,]);
  g1 = g1 + g1i;
  g2 = g2 + g2i;

  x1si = ssq(z);
  x2si = t*t - x1si;
  x1s = x1s + x1si;
  x2s = x2s + x2si;

finish;
***************************************************;
start chk1 (rho, rholim, niter);

  c_l = 0.99;
  c_u = 0.99;

  if (rho <= rholim[1]) then do;
    newrho = c_l * rholim[1];
    print "Iteration " niter;
    print "rho = " rho  " is below the valid range and will be set to "
           newrho;
    rho = newrho;
  end;

  else if (rho >= rholim[2])    then do;
    newrho = c_u * rholim[2];
    print "Iteration " niter;
    print "rho = " rho  " is above the valid range and will be set to "
           newrho;
    rho = newrho;
  end;

finish;
***************************************************;
start adjust(nvar, rho);
  * scaling of nvar, because the factor 1/(1-rho) was
  ignored in the info matrix;
  nvar = (1-rho) * nvar;
finish adjust;
***************************************************;
%macro monitor2;
    %if (&monitor=YES) %then %do;
      print niter rho rholim g1 g2 x1s x2s crit crit2,
            beta u nvar;
    %end;
%mend;
***************************************************;
%macro monitor1;
    %if (&monitor=YES) %then %do;
      print p k min_n max_n sum_n n2;
    %end;
%mend;
***************************************************;
start update (rho, g1, g2, x2s, n2);
      rho = (x2s + (g2-g1)) / n2;
finish;
***************************************************************;
%macro update;
  %if (&fixrho=NO) %then %do;
      run update (rho, g1, g2, x2s, n2);
  %end;
%mend;
***************************************************;
start rhorange(rhomin, rhomax, fv);
*
  input: fv = mean vector (column or row)
  output: rhomin, rhomax = bounds on exchangeable correlation
          imposed by the mean vector and cluster size n;
;

  n = ncol(fv)*nrow(fv);

  if (n=0) then return;
  if (n=1) then do;
    rhomin = -1;
    rhomax =  1;
    return;
  end;

  fv1  = min(fv);
  fvn  = max(fv);
  if (n=2) then do;
    fv2  = fvn;
    fvn1 = fv1;
  end;
  else do;
   * find the second smallest;
   i=loc(fv=fv1);       * locate the min;
   fv[i[1]] = fvn;      * replace it by the max;
   fv2 = min(fv);       * the second smallest;
   fv[i[1]] = fv1;      * put the min back in place;
   * find the second largest;
   i=loc(fv=fvn);       * locate the max;
   fv[i[1]] = fv1;      * replace it by the min;
   fvn1= max(fv);       * the second largest;
   fv[i[1]] = fvn;      * put the max back in place;
  end;

  psi1 = sqrt(fv1/(1-fv1));
  psi2 = sqrt(fv2/(1-fv2));
  psin = sqrt(fvn/(1-fvn));
  psin1 = sqrt(fvn1/(1-fvn1));

  rhomin= -min(psi1*psi2, 1/(psin1*psin), 1/(n-1));
  rhomax= psi1/psin;

finish;
***************************************************;
start   gee4 (beta, oldrho, converge, crit, crit2, nvar, u, rho);

  %if (&fixbeta=NO) %then %do;
    dbeta = nvar * u;
  %end;
  %else %do;
    dbeta = j(nrow(u), 1, 0);
  %end;
  crit  = sum(abs(dbeta));
  crit2 =   abs(oldrho-rho);
  converge = ((crit < &epsilon) & (crit2 < &epsilon2));
  beta   = beta + dbeta;
  oldrho = rho;

finish;
***************************************************;
start   gee3 (rholim, niter, u, h, g, x1s, x2s, g1, g2, p);

    niter = niter + 1;
    u   = j(p, 1, 0);
    h   = j(p, p, 0);
    g   = h;
    x2s = 0;
    x1s = 0;
    g1  = 0;    * sum over i=1 to K of tr(Bi Qi Bi);
    g2  = 0;    * sum over i=1 to K of tr(Ji Bi Qi Bi);
    rholim[1] = -1;     * min;
    rholim[2] =  1;     * max;

finish;
***************************************************;
start gee2 (z, ax, rhomin, rhomax, x, beta, y, offs);
    * compute z, ax;

  %if %length(&offset)=0 %then %do;
    fv = 1/(1+exp(-(x*beta)));    * no offset;
  %end;
  %else %do;
    fv = 1/(1+exp(-(offs+x*beta)));     * offset;
  %end;
  a  = sqrt(fv#(1-fv));
  z  = (y - fv) / a;
  run rhorange(rhomin, rhomax, fv);
  free fv;
  ax = a # x;

finish;
***************************************************;
start gee1 (k, j1, j2, n2, n, sum_n);
  j2 = cusum(n);        * cluster ends;
  j1 = j2 - n + 1;      * cluster begins;
  k = nrow(n);
  sum_n = j2[k];
  n2 = ssq(n) - sum_n;
  free n;       * not needed any more;
finish;
***************************************************;
start gee (beta, rho, nvar, rvar, nep,
           converge, niter, y, offs, x, n, min_n, max_n  );

  * n = cluster sizes
    y = response
    x = design matrix
    offs = offset
    beta, rho: assumed to contain initial values
    n, y, x, offs: assumed sorted by cluster id
    n, y, x, offs: not modified
  ;

  p = ncol(x);
  run gee1 (k, j1, j2, n2, n, sum_n);  * compute  k, j1, j2, n2, free n;
  rholim = j(2,1,0);  * bounds on rho;
  rholim[1]=-1/(max_n -1); rholim[2]=1;
  run chk1 (rho, rholim, 0);
  %monitor1;

  niter = 0;
  irth  = 0;  * :  irth * irth` = inv(info matrix from last iter);
  oldrho = rho + abs(&epsilon);

  do until (converge | (niter=&maxiter));       * main loop;
    run gee3 (rholim, niter, u, h, g, x1s, x2s, g1, g2, p);
    do cluster = 1 to k;
      run get1c (axi, zi, rhomin, rhomax, x, beta, y, offs, cluster, j1, j2);
      rholim[1] = max(rholim[1], rhomin);
      rholim[2] = min(rholim[2], rhomax);
      run acc1c (u, h, g, x1s, x2s, g1, g2, axi, zi, rho, irth);
    end;
    g1 = g1 * (1-rho);  * the missing factor;
    g2 = g2 * (1-rho);  * the missing factor;
    %update;    * update rho;
    run chk1 (rho, rholim, niter);
    run psdinv (nvar, irth, nep, h);
    run gee4 (beta, oldrho, converge, crit, crit2, nvar, u, rho);
    %monitor2;
  end;  * main loop;

  rvar = nvar * g * nvar;
  run adjust(nvar, rho);

  if (^converge) then do;
    print "gee(): No convergence, niter = " niter;
  end;

  * show names memory;
finish gee;
***************************************************;
start initbeta (beta, p);

  * initial values of beta;
  * make sure beta has the correct dimension;
  * p == ncol(x);

  %if (%length(&beta)=0) %then %do;
    beta = j(p, 1, 0);
  %end;

  %else %do;
    beta = {&beta};
    a = ncol(beta) * nrow(beta);
    beta = shape(beta, a, 1);
    if (a > p) then do;
      print "The supplied beta vector is too long and will be truncated";
      beta = beta[1:p];
    end;
    else if (a < p) then do;
      print "The supplied beta vector is too short and will be padded";
      beta = beta  // j(p-a, 1, 0);
    end;
  %end;

finish initbeta;
***************************************************;
start getdata (x, y, offs, id1, n, beta, parms, rho, p);

  * init. all these objects;

  use _N_;
  read all var {n} into n;
  close _N_;

  * read data;
  use  &data;
  read all var {&xvar} into x;       * covariates;
  read all var {&yvar} into y;       * response;
  read all var {&id} into id1;       * id;
  %if %length(&offset)=0 %then %do;
    offs = 0;   * no offset;
  %end;
  %else %do;
    read all var {&offset} into offs;    * offset;
  %end;
  close &data;

  parms  = t({&xvar});               * variable names;

  rho = &rho;
  p = ncol(x);
  run initbeta(beta, p);
finish getdata;
***************************************************;
start results1 (s, ttl, parms, beta, v, za);

  b = beta;
  se = sqrt(vecdiag(v));       * standard errors;
  redcol = loc(se = 0);
  if (ncol(redcol)>0) then do;
      b [redcol]=.;
      se[redcol]=.;
  end;
  z = b/se;
  p = 2 * (1-probnorm(abs(z)));
  r = exp(b);           * odds ratios;
  lower = exp(b - za*se);
  upper = exp(b + za*se);
  variable = b || se || z || p || r || lower || upper;
  print  s,  variable  [r=parms c=ttl] ,,;

finish;
***************************************************;
%if %length(&outest)^=0 %then %do;
start dataout1 (rho, beta, nvar, rvar);
  * output estimates of parameters and covariance matrices;

    n = 1 + nrow(nvar) + nrow(rvar);
    a1 = {PARMS} //
         repeat({NCOV}, nrow(nvar),1) //
         repeat({RCOV}, nrow(nvar),1);
    a2 = {" "} //
         repeat(t({&xvar}) , 2, 1);
    a3 = repeat({&yvar}, n, 1);
    a  = a1 || a2 || a3;
    _TYPE_="12345678";
    _NAME_="12345678";
    _DEPVAR_="12345678";

    * char variables -> _ds1_;
    create _ds1_ var{ _TYPE_ _NAME_  _DEPVAR_};
    append from a;
    close _ds1_;

    x =  (t(beta) //     nvar //     rvar)  ||  repeat(rho, n, 1);
    g = ({&xvar})  || ({"_RHO_"});

    * numeric variables  -> _ds2_;
    create _ds2_ from x [colname=g];
    append from x;
    close _ds2_;

finish dataout1;
%end;
***************************************************;
%if %length(&outs)^=0 %then %do;
start dataout (id1, y, offs, x, beta);
  * output fitted values and residuals;

  %if %length(&offset)=0 %then %do;
    predict = 1/(1+exp(-(x*beta)));    * no offset;
  %end;
  %else %do;
    predict = 1/(1+exp(-(offs+x*beta)));     * offset;
  %end;
  residual  = y - predict;
  stdev     = sqrt(predict#(1-predict));
  stdresid  = residual / stdev;
  free stdev;

  * numeric variables;
  cols = {&id    &yvar "Predict"  "Residual"  "StdResid"   &xvar  };
  dm   =   id1 || y ||  predict || residual || stdresid ||  x ;

  create &outs  from dm [colname = cols];
  append from dm;
  close &outs  ;

finish dataout;
%end;
***************************************************;
start results (parms, beta, rvar, nvar, converge,
               niter, nep, rho, x, y, offs, id1);

if (converge) then
do;
  print "Converged, niter =  " niter;
  print "Number of estimable parameters = "  nep;

  %if (&fixrho=NO) %then %do;
    print "Estimated intra-cluster correlation = " rho;
  %end;
  %else %do;
    print "Intra-cluster correlation is fixed at " rho;
  %end;

  print "Summary of estimates and &conf confidence intervals:";
  z = probit(0.5+&conf*0.5);

  s = "Results using the robust variance estimator:";
  ttl = {'Estimate'  'Robust SE' 'z' 'p-val'
         'Odds Ratio' 'CI-lower' 'CI-upper'};
  run   results1 (s, ttl, parms, beta, rvar, z);

  s = "Results using the naive variance estimator:";
  ttl = {'Estimate'  'Naive  SE' 'z' 'p-val'
         'Odds Ratio' 'CI-lower' 'CI-upper'};
  run   results1 (s, ttl, parms, beta, nvar, z);

  %if %length(&outs)^=0 %then %do;
     run dataout (id1, y, offs, x, beta);
  %end;

  %if %length(&outest)^=0 %then %do;
     run dataout1 (rho, beta, nvar, rvar);
  %end;

end;
else if (^ converge) then
  do;
    print 'No convergence, niter ' niter;
    %if (&monitor ^= YES) %then %do;
      print "Consider running with monitor=yes" ;
    %end;
  end;
finish;
***************************************************************;
%macro options;
  %if (&monitor=YES) %then %do;
    reset name;
  %end;
  %else %do;
    reset noname;
  %end;
%mend options;
***************************************************************;

/* iml execution starts here -> */

%options;

run signon;

run getdata (x, y, offs, id1, n, beta, parms, rho, p);

run info (k, sum_n, min_n, max_n, n, p);

run   gee (beta, rho, nvar, rvar, nep,
           converge, niter, y, offs, x, n, min_n, max_n  );

run results (parms, beta, rvar, nvar, converge,
             niter, nep, rho, x, y, offs, id1);

***************************************************************;
/*
%if %length(&outs)^=0 %then %do;
   data &outs;
   set _ds4_;
   run;
%end;
*/
***************************************************************;
%if %length(&outest)^=0 %then %do;
   data &outest;
   merge _ds1_ _ds2_;
   run;
%end;
***************************************************************;
%stopit:
%mend gee;