/*********************************************************************/ /* Written by: Janet Tooze 6/19/00 */ /* Updated: 8/21/02 to add normal distribution */ /* 9/27/02 to increase variable length */ /* 11/12/02 to allow for no covariates */ /* 7/1/03 to allow starting values */ /* */ /* The MIXCORR macro is used to fit the Logistic-Lognormal-Normal */ /* Mixture or Logistic-Normal-Normal model with Correlated random */ /* effects using PROC NLMIXED. */ /* Currently, the macro uses the default settings in PROC NLMIXED, */ /* adaptive Gaussian quadrature and a dual quasi-Newton algorithm, */ /* to fit the model. Seven quadrature points are specified. */ /* Starting values for the two components of the model are obtained */ /* using PROC GENMOD to fit a logistic model without random effects */ /* and a lognormal model without random effects. Next, the */ /* Mixture model with Uncorrelated random effects */ /* is fit to the data, and the parameter estimates from this model */ /* are used as starting values for the model with correlated random */ /* effects. The macro gives the results for the models with and */ /* without correlated random effects. */ /* */ /* */ /* The syntax for the macro is: */ /* */ /* %mixcorr (data=,distn=,response=,covarsb=,covarsl=,subject=, */ /* start_val=); */ /* */ /* where "data" specifies the dataset to be used, */ /* "distn" specifies the distribution for intensity, */ /* lognormal or normal */ /* "response" specifies the Y variable, */ /* "covarsb" is a list of the covariates for the occurence */ /* portion of the model separated by spaces - */ /* Interactions must be in the order */ /* specified by PROC GENMOD */ /* "covarsl" is a list of the covariates for the intensity */ /* portion of the model separated by spaces - */ /* Interactions must be in the order */ /* specified by PROC GENMOD, and */ /* "subject" specifies the variable that indicates the */ /* unit with repeated measures. */ /* "start_val" specifies what starting values should be used */ /* (default is startm from genmod) */ /* */ /* Please note that the variable name "YN" is reserved for this macro*/ /* as well as: a1-z1, aa-zz, a2-z2, ab-az, s2, u1, u2, u1v, u2v, u12v*/ /* ll, ll1, ll2, llb, ll_ln, x1b1, x2b2, e_int, eint, eta1, eta_1, */ /* eta2, eta_2 /*********************************************************************/ %macro mixcorr(data,distn,response,covarsb,covarsl,subject,start_val); proc sort data=&data; by &subject; run; options nocenter nodate nonumber ls=72 ps=56; %if &start_val=%str() %then %let start_val=startm; %put startval: &start_val; /*---turn off printing---*/ ods exclude all; %let varsb=%upcase(%quote(&covarsb)); %let I=1; %do %until(%qscan(&varsb,&I,%str( ))=%str()); %let varb&I=%qscan(&varsb,&I,%str( )); %put &&varb&i; %let I=%eval(&I+1); %end; %let cntb=%eval(&I-1); %if &varsb=%str() %then %let cntb=0; %let varsl=%upcase(%quote(&covarsl)); %let I=1; %do %until(%qscan(&varsl,&I,%str( ))=%str()); %let varl&I=%qscan(&varsl,&I,%str( )); %put &&varl&i; %let I=%eval(&I+1); %end; %let cntl=%eval(&I-1); %if &varsl=%str() %then %let cntl=0; data data; set &data; if &response=0 then YN=0; else if &response>0 then YN=1; run; data data0; set &data; if &response>0 then logint=log(&response); if &response>0; run; /*---find starting estimates for logistic piece---*/ ods output ParameterEstimates=parmsg1(rename=Parameter=Name) modelfit=modelfitb; proc genmod data=data descending; model yn=&varsb/dist=binomial; run; data random1; Parameter='u1v'; Name='Var(Rndm Effect)'; Estimate=1; run; data newnames1; format var_name $70.; %let crd1=a1 b1 c1 d1 e1 f1 g1 h1 i1 j1 k1 l1 m1 n1 o1 p1 q1 r1 t1 v1 w1 x1 y1 z1 aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx yy zz; set parmsg1; if Name='Scale' then delete; %if &cntb>0 %then %do; %let j=1; %do %until(&j=(&cntb+1)); %let up=%upcase(&&varb&j); %let lngth=%length(&&varb&j); %put &up &lngth; %let int=%qscan(&crd1,1,%str( )); %let varname=%qscan(&crd1,(&j+1),%str( )); if &lngth<=20 then do; if Name='Intercept' then Parameter="&int"; else if UPCASE(Name)="&up" then do; Parameter="&varname"; var_name="&up"; end; end; if &lngth>20 then do; if Name='Intercept' then Parameter="&int"; else if UPCASE(Name)=substr("&up",1,20) then do; Parameter="&varname"; var_name="&up"; end; end; %let j=%eval(&j+1); %end; %end; %if &cntb=0 %then %do; %let int=%qscan(&crd1,1,%str( )); if Name='Intercept' then Parameter="&int"; %end; run; data start1; format Name $20. Parameter $4.; set newnames1 random1; if Parameter not in ('a1','u1v') then list=trim(Parameter)||'*'||trim(Var_Name); else if Parameter in ('a1','u1v') then list=trim(Parameter); keep Parameter Name Estimate list; run; data trans1; set start1; if Parameter in ('u1v') then delete; run; proc transpose data=trans1 out=out1; var list; run; data eqn1; format eta1 eta_1 $2000.; set out1; %if &cntb>0 %then %do; %let k=1; eta1=trim(col&k); %if (&k ne (&cntb+1)) %then %do %until(&k=(&cntb+1)); %let k=%eval(&k+1); eta1=trim(eta1)||'+'||trim(col&k); %end; eta_1=trim(eta1)||'+u1'; %end; %if &cntb=0 %then %do; eta_1=trim(col1)||' + u1'; %end; call symput('eta1',eta_1); run; %put &eta1; /*---turn the printing back on---*/ *ods select all; ods output ParameterEstimates=parmsf1 FitStatistics=fitf1 ConvergenceStatus=convf1; title1 'Binomial Model (Uncorrelated)'; proc nlmixed data=data; parms /data=start1; bounds u1v>=0; x1b1=&eta1; p=exp(x1b1)/(1+exp(x1b1)); model YN ~ binomial(1,p); random u1 ~ normal(0,u1v) subject=&subject; predict p out=predpu; predict u1 out=predu1u; run; /*---turn off printing---*/ *ods exclude all; data _null_; set convf1; call symput('convbi',Reason); run; %let dist="&distn"; %put &dist; %if (&dist="lognormal" or &dist="Lognormal" or &dist="LOGNORMAL") %then %do; /*---find starting estimates for lognormal piece---*/ ods output ParameterEstimates=parmsg2(rename=Parameter=Name) modelfit=modelfitin; proc genmod data=data0; model logint=&varsl/dist=normal; run; data random2; Parameter='u2v'; Name='Var(Rndm Effect)'; Estimate=1; run; data newnames2; format var_name $70.; %let crd2=a2 b2 c2 d2 e2 f2 g2 h2 i2 j2 k2 l2 m2 n2 o2 p2 q2 r2 t2 v2 w2 x2 z2 ab ac ad ae af ag ah aj ak al am an ao ap aq ar as at au av aw ax ay az; set parmsg2; %if &cntl>0 %then %do; %let j=1; %do %until(&j=(&cntl+1)); %let up=%qupcase(&&varl&j); %let lngth=%length(&&varl&j); %let int=%qscan(&crd2,1,%str( )); %let varname=%qscan(&crd2,(&j+1),%str( )); if &lngth<=20 then do; if Name='Intercept' then Parameter="&int"; else if UPCASE(Name)="&up" then do; Parameter="&varname";var_name="&up"; end; else if Name='Scale' then do; Parameter='s2'; Estimate=Estimate**2; Name='Residual'; end; end; if &lngth>20 then do; if Name='Intercept' then Parameter="&int"; else if UPCASE(Name)=substr("&up",1,20) then do; Parameter="&varname";var_name="&up"; end; else if Name='Scale' then do; Parameter='s2'; Estimate=Estimate**2; Name='Residual'; end; end; %let j=%eval(&j+1); %end; %end; %if &cntl=0 %then %do; %let int=%qscan(&crd2,1,%str( )); if Name='Intercept' then Parameter="&int"; else if Name='Scale' then do; Parameter='s2'; Estimate=Estimate**2; Name='Residual'; end; %end; run; data start2; format Name $20. Parameter $4.; set newnames2 random2; if Parameter not in ('a2','u2v') then list=trim(Parameter)||'*'||trim(Var_Name); else if Parameter in ('a2','u2v') then list=trim(Parameter); keep Parameter Name Estimate list; run; data trans2; set start2; if Parameter in ('u2v','s2') then delete; run; proc transpose data=trans2 out=out2; var list; run; data eqn2; format eta2 eta_2 $2000.; set out2; %if &cntl>0 %then %do; %let k=1; eta2=trim(col&k); %if (&k ne (&cntl+1)) %then %do %until(&k=(&cntl+1)); %let k=%eval(&k+1); eta2=trim(eta2)||'+'||trim(col&k); %end; eta_2=trim(eta2)||'+u2'; %end; %if &cntl=0 %then %do; eta_2=trim(col1)||' + u2'; %end; call symput('eta2',eta_2); run; %put &eta2; /*---turn the printing back on---*/ *ods select all; ods output ParameterEstimates=parmsf2 FitStatistics=fitf2 ConvergenceStatus=convf2; title1 'Lognormal Model (Uncorrelated)'; proc nlmixed data=data0; parms / data=start2; bounds u2v>=0; x2b2=&eta2; ll1=log(1/(sqrt(2*3.1416*s2)*&response)); ll2=(-(log(&response)-x2b2)**2)/(2*s2); ll=ll1+ll2; model &response ~ general(ll); random u2 ~ normal(0,u2v) subject=&subject; e_int=x2b2; predict u2 out=predu2u; predict e_int out=predeintu; run; data _null_; set convf2; call symput('convin',Reason); run; /*---turn off printing---*/ *ods exclude all; %end; %if (&dist="normal" or &dist="Normal" or &dist="NORMAL") %then %do; /*---find starting estimates for normal piece---*/ ods output ParameterEstimates=parmsg2(rename=Parameter=Name) modelfit=modelfitin; proc genmod data=data0; model &response=&varsl/dist=normal; run; data random2; Parameter='u2v'; Name='Var(Rndm Effect)'; Estimate=1; run; data newnames2; format var_name $70.; %let crd2=a2 b2 c2 d2 e2 f2 g2 h2 i2 j2 k2 l2 m2 n2 o2 p2 q2 r2 t2 v2 w2 x2 z2 ab ac ad ae af ag ah aj ak al am an ao ap aq ar as at au av aw ax ay az; set parmsg2; %if &cntl>0 %then %do; %let j=1; %do %until(&j=(&cntl+1)); %let up=%qupcase(&&varl&j); %let lngth=%length(&&varl&j); %let int=%qscan(&crd2,1,%str( )); %let varname=%qscan(&crd2,(&j+1),%str( )); if &lngth<=20 then do; if Name='Intercept' then Parameter="&int"; else if UPCASE(Name)="&up" then do; Parameter="&varname";var_name="&up"; end; else if Name='Scale' then do; Parameter='s2'; Estimate=Estimate**2; Name='Residual'; end; end; if &lngth>20 then do; if Name='Intercept' then Parameter="&int"; else if UPCASE(Name)=substr("&up",1,20) then do; Parameter="&varname";var_name="&up"; end; else if Name='Scale' then do; Parameter='s2'; Estimate=Estimate**2; Name='Residual'; end; end; %let j=%eval(&j+1); %end; %end; %if &cntl=0 %then %do; %let int=%qscan(&crd2,1,%str( )); if Name='Intercept' then Parameter="&int"; if Name='Intercept' then Parameter="&int"; else if Name='Scale' then do; Parameter='s2'; Estimate=Estimate**2; Name='Residual'; end; %end; run; data start2; format Name $20. Parameter $4.; set newnames2 random2; if Parameter not in ('a2','u2v') then list=trim(Parameter)||'*'||trim(Var_Name); else if Parameter in ('a2','u2v') then list=trim(Parameter); keep Parameter Name Estimate list; run; data trans2; set start2; if Parameter in ('u2v','s2') then delete; run; proc transpose data=trans2 out=out2; var list; run; data eqn2; format eta2 eta_2 $2000.; set out2; %if &cntl>0 %then %do; %let k=1; eta2=trim(col&k); %if (&k ne (&cntl+1)) %then %do %until(&k=(&cntl+1)); %let k=%eval(&k+1); eta2=trim(eta2)||'+'||trim(col&k); %end; eta_2=trim(eta2)||'+u2'; %end; %if &cntl=0 %then %do; eta_2=trim(col1)||' + u2'; %end; call symput('eta2',eta_2); run; %put &eta2; /*---turn the printing back on---*/ *ods select all; ods output ParameterEstimates=parmsf2 FitStatistics=fitf2 ConvergenceStatus=convf2; title1 'Normal Model (Uncorrelated)'; proc nlmixed data=data0; parms / data=start2; bounds u2v>=0; x2b2=&eta2; ll1=log(1/(sqrt(2*3.1416*s2))); ll2=(-(&response-x2b2)**2)/(2*s2); ll=ll1+ll2; model &response ~ general(ll); random u2 ~ normal(0,u2v) subject=&subject; e_int=x2b2; predict u2 out=predu2u; predict e_int out=predeintu; run; data _null_; set convf2; call symput('convin',Reason); run; /*---turn off printing---*/ *ods exclude all; %end; /*---compute starting value for covariance---*/ data cov; set parmsf1 parmsf2; if Parameter in ('u1v','u2v'); keep Parameter Estimate; run; proc transpose data=cov out=outcov; var Estimate; id Parameter; run; data cov2; set outcov; Parameter='u12v'; Rho=0.5; Name='Covariance'; Estimate=Rho*sqrt(u1v)*sqrt(u2v); keep Parameter Estimate Name; run; data fit1; format Parameter $4.; set fitf1; if Descr in ('-2 Log Likelihood','AIC (smaller is better)'); if Descr='-2 Log Likelihood' then do; Parameter='ll'; Name='-2 Log Likelihood--bi'; end; if Descr='AIC (smaller is better)' then do; Parameter='aic'; Name='AIC--bi'; end; proc sort; by Parameter; run; data fit2; format Parameter $4.; set fitf2; if Descr in ('-2 Log Likelihood','AIC (smaller is better)'); if Descr='-2 Log Likelihood' then do; Parameter='ll'; Name="-2 Log Likelihood--"||"&distn"; end; if Descr='AIC (smaller is better)' then do; Parameter='aic'; Name="AIC--"||"&distn"; end; proc sort; by Parameter; run; data fitboth; merge fit1 (rename=Value=Valueb) fit2 (rename=Value=Valuel); by Parameter; m2llo=Valueb + Valuel; run; data names; format Parameter $4. type $6. Name $40. ; set start1 (in=a) start2 (in=b) cov2 (in=c) fit1 (in=d) fit2 (in=e); if a=1 then type='1.bi'; else if b=1 then type='2.in'; else if c=1 then type='3.cov'; else if d=1 or e=1 then type='4.fit'; If type in ('1.bi','2.in') then Name2=trim(Name)||'--'||substr(type,3,2); else Name2=Name; keep Parameter Name type Name2 Value; proc sort; by Parameter; run; data names2; set names; if type='4.fit' then delete; run; data start3; format Parameter $4.; set parmsf1 parmsf2 cov2; proc sort; by Parameter; run; data startm; merge names2 start3; by Parameter; run; data estprint; merge names start3; by Parameter; run; proc sort; by type; run; /*---turn the printing back on---*/ *ods select all; /*---Fit Correlated Model---*/ title1 'Correlated Model'; ods output ParameterEstimates=parmsf3 FitStatistics=fitf3 ConvergenceStatus=convf3; proc nlmixed data=data; parms / data=&start_val; bounds u1v>=0; bounds u2v>=0; x1b1=&eta1; p=exp(x1b1)/(1+exp(x1b1)); llb=log((1-p)**(1-YN)) + log(p**(YN)); x2b2=&eta2; pi=arcos(-1); E_int=x2b2; %if (&dist="lognormal" or &dist="Lognormal" or &dist="LOGNORMAL") %then %do; if &response>0 then do; ll1=log(1/(sqrt(2*pi*s2)*&response)); ll2=(-(log(&response)-x2b2)**2)/(2*s2); ll_ln=ll1+ll2; end; if &response=0 then ll=llb; else if &response>0 then ll=llb+ll_ln; %end; %if (&dist="normal" or &dist="Normal" or &dist="NORMAL") %then %do; if &response>0 then do; ll1=log(1/(sqrt(2*pi*s2))); ll2=(-(&response-x2b2)**2)/(2*s2); ll_n=ll1+ll2; end; if &response=0 then ll=llb; else if &response>0 then ll=llb+ll_n; %end; model &response ~ general(ll); random u1 u2 ~ normal([0,0],[u1v,u12v,u2v]) subject=&subject out=rndm; predict p out=predp; predict u1 out=predu1; predict u2 out=predu2; predict e_int out=predeint; run; data _null_; set convf3; call symput('convcr',Reason); run; data fit3; format Parameter $4.; set fitf3; if Descr in ('-2 Log Likelihood','AIC (smaller is better)'); if Descr='-2 Log Likelihood' then do; Parameter='ll'; Name2='-2 Log Likelihood'; type='4.fit'; end; if Descr='AIC (smaller is better)' then do; Parameter='aic'; Name2='AIC'; type='4.fit'; end; run; data namesf; set startm fit3; keep Parameter Name2 type Value; proc sort; by Parameter; run; run; proc sort data=parmsf3; by Parameter; run; proc sort data=fitboth; by Parameter; run; data final; merge parmsf3 namesf fitboth; by Parameter; if Parameter='ll' then do; lrtest=m2llo-Value; probll=1-probchi(lrtest,1); end; proc sort; by type; run; data ll; set final; if parameter = 'll'; logl = value; keep logl; data aic; set final; if parameter = 'aic'; aic = value; keep aic; data numparms; merge ll aic; pcorr = (aic - logl)/2; pgenmod = pcorr - 3; porig = pcorr - 1; parameter = 'aic'; run; /*---turn the printing back on---*/ ods select all; /*---Report results for Uncorrelated Model---*/ data _null_; title1 "Results from Fitting Uncorrelated Model"; title2 "Response Variable: &response"; title3; title4 "Convergence Status:"; title5 " Binomial Model -- &convbi"; title6 " &distn Model -- &convin"; retain aaa 3 ab 5 bb 15 c 40 d 50 e 60; file print header=header ll=ll ls=126 ps=70; set estprint end=eof; by type; if type in ('1.bi','2.in') then put @ab Parameter @bb Name2 @c Estimate 8.4-r @d StandardError 8.4-r @e Probt 6.4-r; if ll=1 then do; put @aaa 70*'_'; end; if eof then do; put @aaa 70*'_'; end; return; header: put // @aaa 70*'_'; put @ab 'Parameter' @bb 'Name' @c 'Estimate' @d ' Std Err' @e 'Prob>|t|'; put @aaa 70*'_'; return; run; data estprint2; set estprint; by type Parameter; prevval=lag(Value); if last.Parameter then sum=Value+prevval; run; data _null_; title1 "Results from Fitting Uncorrelated Model"; title2 "Response Variable: &response"; retain aaa 3 ab 5 bb 40 cc 56; file print header=header ll=ll ls=126 ps=70; set estprint2 end=eof; by type; if type in ('4.fit') then put @ab Name2 @bb Value 8.2-r @cc sum 8.2-r; if ll=1 then do; put @aaa 70*'_'; end; if eof then do; put @aaa 70*'_'; end; return; header: put // @aaa 70*'_'; put @ab 'Name' @bb ' Value' @cc ' Sum'; put @aaa 70*'_'; return; run; title; /*---Report results for Correlated Model---*/ data _null_; title1 "Results from Fitting Correlated Model"; title2 "Response Variable: &response"; title3; title4 "Convergence Status:"; title5 " &convcr"; retain aaa 3 ab 5 bb 15 c 40 d 50 e 60; file print header=header ll=ll ls=126 ps=70; set final end=eof; by type; if type in ('1.bi','2.in','3.cov') then put @ab Parameter @bb Name2 @c Estimate 8.4-r @d StandardError 8.4-r @e Probt; if ll=1 then do; put @aaa 70*'_'; end; if eof then do; put @aaa 70*'_'; end; return; header: put // @aaa 70*'_'; put @ab 'Parameter' @bb 'Name' @c 'Estimate' @d ' Std Err' @e 'Prob>|t|'; put @aaa 70*'_'; return; run; data _null_; title1 "Results from Fitting Correlated Model"; title2 "Response Variable: &response"; retain aaa 3 ab 5 bb 25 cc 35 dd 50; file print header=header ll=ll ls=126 ps=70; set final end=eof; by type; if type in ('4.fit') then put @ab Name2 @bb Value 8.2-r @cc lrtest 8.2-r @dd probll 6.4-r; if ll=1 then do; put @aaa 70*'_'; end; if eof then do; put @aaa 70*'_'; end; return; header: put // @aaa 70*'_'; put @ab 'Name' @bb 'Value' @cc 'Diff in -2ll' @dd 'p-value'; put @aaa 70*'_'; return; run; data modelfit1; format Parameter $4.; merge modelfitb (rename=value=ll_b_gen) modelfitin (rename=value=ll_in_gen); if Criterion = 'Log Likelihood'; m2_ll_b_gen = -2*ll_b_gen; m2_ll_in_gen = -2*ll_in_gen; m2_ll_genmod = -2*(ll_b_gen + ll_in_gen); Name = '-2 Log Likelihood'; Parameter = 'll'; run; data modelfit2; format Parameter $4.; merge modelfit1 numparms; m2_ll_genmod = m2_ll_genmod + (2*pgenmod); Parameter = 'aic'; Name = 'AIC'; run; data modelfit; format Parameter $4.; set modelfit1 modelfit2; run; data _null_; title1 "Results from Genmod Model w/o Random Effects"; title2 "Response Variable: &response"; retain aaa 3 ab 5 bb 25 cc 40 dd 50; file print header=header ll=ll ls=126 ps=70; set modelfit end=eof; put @ab Name @bb m2_ll_b_gen 8.1-r @cc m2_ll_in_gen 8.1-r @dd m2_ll_genmod 8.1-r; if ll=1 then do; put @aaa 70*'_'; end; if eof then do; put @aaa 70*'_'; end; return; header: put // @aaa 70*'_'; put @ab 'Name' @bb 'Binomial' @cc "&distn" @dd 'Sum'; put @aaa 70*'_'; return; run; proc sort data=final; by Parameter; run; proc sort data=modelfit; by Parameter; run; proc sort data=numparms; by Parameter; run; data finalll; merge final(rename=Value=m2ll_corr) modelfit numparms; by Parameter; if Parameter in ('ll','aic'); run; %mend;