OPTIONS NOCENTER FORMCHAR = "|----|+|---+=";
TITLE1 'Data From: R. Mermelstein NCI smoking cessation project';
TITLE2 'Looking at imputation of smoking variable - OR = 2';
/* looking at last timepoint and collapsing controls & no-shows vs
two treatment groups */
/* performing simulation for the missing observations - OR=2 for MISSING & SMOKING */
/* putting in uncertainty attributable to BETAs */
/* USING PROC MIANALYZE for getting the MI results */
DATA one;
INFILE 'c:\smoke.dat';
INPUT id smk miss smk0 grp;
/* these variables are coded as follows:
grp 0=tx 1=control
smk (smoking at final endpoint - 24 month followup) 0=abstinent 1=smoking
smk0 (smoking at post-intervention) 0=abstinent 1=smoking
miss (missing at final endpoint) 0=observed 1=missing */
/* observed cell frequencies - these were obtained previous to this analysis */
n111 = 42; /* number of abstainers - smk0=abstinent */
n112 = 71; /* number of smokers - smk0=abstinent */
n211 = 36; /* number of abstainers - smk0=smoking */
n212 = 223; /* number of smokers - smk0=smoking */
/* using OR=2 for MISSING with SMOKING */
orat = 2;
beta0m = LOG(n112/n111);
beta1m = LOG(orat);
beta2m = LOG(n212/n211);
beta3m = LOG(orat);
seed = 974677743;
/* figure out cell frequencies for the missing cells */
/* these are based on the observed cell frequencies and the assumed odds ratio */
n12dot = 37; /* number of missing for smk0=abstinent */
p122 = (orat*(n112/n111))/(1 + (orat*(n112/n111)));
n122 = p122*n12dot;
n121 = (1 - p122)*n12dot;
n22dot = 80; /* number of missing for smk0=smoking */
p222 = (orat*(n212/n211))/(1 + (orat*(n212/n211)));
n222 = p222*n22dot;
n221 = (1 - p222)*n22dot;
/* variance associated with betas */
beta0v = (n111 + n112)/(n111*n112);
beta1v = 1/n111 + 1/n112 + 1/n121 + 1/n122;
beta2v = (n211 + n212)/(n211*n212);
beta3v = 1/n211 + 1/n212 + 1/n221 + 1/n222;
/* 100 simulated datasets */
DATA sim; SET one;
ARRAY smks(100) smks1-smks100;
DO i = 1 TO 100;
IF miss EQ 0 THEN smks(i) = smk;
IF miss EQ 1 THEN DO;
exp1 = RANEXP(seed); exp2 = RANEXP(seed); stdl = LOG(exp1/exp2);
ran0 = RANNOR(seed); ran1 = RANNOR(seed);
/* the next lines incorporate the covariance between beta0 and beta1
using the Cholesky factorization of the variance-covariance matrix
(and likewise for beta2 and beta3) */
beta0 = beta0m + ran0*SQRT(beta0v);
beta1 = beta1m - ran0*SQRT(beta0v) + ran1*SQRT(beta1v - beta0v);
ran2 = RANNOR(seed); ran3 = RANNOR(seed);
beta2 = beta2m + ran2*SQRT(beta2v);
beta3 = beta3m - ran2*SQRT(beta2v) + ran3*SQRT(beta3v - beta2v);
ystar = (1 - smk0)*(beta0 + beta1*miss) + smk0*(beta2 + beta3*miss) + stdl;
smks(i) = 0; IF ystar > 0 THEN smks(i) = 1;
END;
END;
/* set up the data in long format - with imputation number being a variable */
DATA unisim (KEEP = id grp smki _imputation_); SET sim;
ARRAY smks(100) smks1-smks100;
DO _imputation_ = 1 to 100;
smki = smks(_imputation_);
OUTPUT;
END;
PROC SORT; BY _imputation_ ;
RUN;
PROC LOGISTIC DESCENDING NOPRINT OUTEST=outlreg COVOUT;
MODEL smki = grp / LINK = LOGIT;
BY _imputation_ ;
RUN;
PROC MIANALYZE DATA=outlreg;
VAR intercept grp;
RUN;