/*
Analyst: Peter H. Van Ness
Date Created: August 23, 2010
Purpose: The purpose of this program is to calculate an intraclass correlation coefficient for a binary outcome.
*/
*Clustering Analysis;
*The intraclass correlation coefficient is the ratio of the between cluster variance to the total variance, i. e., the summation of the between and within cluster variances. The code below calculates the ICC for binary outcome data, i.e., for proportions;
*Here is some sample simulated data;
data binary_icc;
input id nursing_home binary_outcome;
datalines;
1 1 1
2 1 1
3 1 0
4 1 0
5 1 0
6 2 1
7 2 0
8 2 0
9 2 0
10 2 1
11 3 0
12 3 0
13 3 0
14 3 1
15 3 1
16 4 0
17 4 0
18 4 0
19 4 0
20 4 1
21 5 1
22 5 1
23 5 0
24 5 0
25 5 1
26 6 0
27 6 0
28 6 0
29 6 1
30 6 0
;
run;
proc print data=binary_icc; run;
*This first step calculates the percentages for each of the clusters
and creates a variable for these percentages called percent;
proc sort data=binary_icc out=sortbynh; by nursing_home; run;
proc print data=sortbynh; run;
ods output OneWayFreqs = nhper (where = (binary_outcome=1));
ods trace on;
proc freq data=sortbynh;
title2 'Percentages of Outcomes by Nursing Home';
by nursing_home;
tables binary_outcome;
run;
ods trace off;
ods output close;
proc print data=nhper; run;
*This code converts the percentages to proportions in a variable called proportion;
data nhpro; set nhper; proportion= percent/100; run;
proc print data=nhpro; title2 'Proportions of Outcomes by Nursing Home'; run;
*This second step calculates the between cluster variance and creates a variable for it called btwvar;
*The between cluster variance yielded by the procedure below is .02266667;
ods output Moments = btwvar (keep = label2 cvalue2 where = (Label2 = 'Variance') rename = (cvalue2 = btwvar));
ods trace on;
proc univariate data=nhpro;
var proportion;
run;
ods trace off;
ods output close;
proc print data=btwvar; title2 'The Between Cluster Variance'; run;
*This third step calculates the total variance;
*The total variance, including the between and within cluster variance, calculated by the procedure below is P*(1-P) or approximately (ASE^2)*N;
*.36667*(1-.36667) (~= (.0880^2)*30) = .23222;
ods output OneWayFreqs = totper (keep = percent binary_outcome rename = (percent = totper) where = (binary_outcome=1));
proc freq data=binary_icc;
title2 'The Binary Outcome Variable';
tables binary_outcome / binomial (level=2);
run;
ods output close;
proc print data=totper; run;
*This code converts the total percentage to a total proportion in a variable called totpro and calculates the total variance;
data totvar; set totper;
totpro = totper/100;
totvar = totpro*(1 - totpro);
run;
proc print data=totvar; title2 'The Total Variance'; run;
*This fourth step calculates the ICC from the two values obtained above;
*So the ICC for this data is the ratio of the between cluster variance to the total variance or .02266667/ .23222, or .097608 or .098 rounded to three decimal places;
data icc; merge btwvar totvar; icc = btwvar/ totvar; run;
proc print data=icc; title2 'The ICC for a Binary Outcome'; var btwvar totvar icc; run;