/*

#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
   
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
   
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.


Peter H. Van Ness
November 10, 2008
This program provides SAS code to calculate the probabilstic index from both a Wilcoxon Rank Sum
Statistic and from a Somers' D Correlation Coefficient.  See: Acion, Laura, Peterson, John J.,
Temple, Scott, and Arndt, Stephen, Probabilistic index: an intuitive, non-parametric approach
to measuring the size of treatment effects. Statistics in Medicine; 2006:591-602.

Note that the above authors speak of Kendall's Tau-b Correlation Coefficient.  In some
circumstances it yields the same value as Somers' D.  From the SAS output it is the
Somers' D value that should be generally used.
*/

*This presents the structure of the data.  Treatment leads to higher, more positive scores;

data p_index;
	input treatment score;
datalines;
1 -5
1  0
1  1
1  0
1  1
1  0
1  3
1  5
0 -5
0 -4
0 -1
0 -1
0 -1
0  0
0  0
0  -1
0  4
;
run;

proc print data=p_index; var score; run;


/*******************Probabilistic Index Calculation********************/

*Wilcoxon Rank Sum Test Version;

*This code calculates the probabilistic index P(treatment>control);

proc npar1way data=p_index wilcoxon;
	class treatment;
	var   score;
run; 

*From the PROC NPARIWAY output the w is the rank sum total for treatment and n is the sample
of this treatment group, while m is the sample size of the other control group ;

%let w = 90.50;
%let n = 8;
%let m = 9;

%put &w &n &m;

data wilcoxon_calc;
p_index = (&w - .5*&n*(&n+1))/ (&n*&m);
run;

proc print data=wilcoxon_calc; 
	title  'Probabilistic Index from the Wilcoxon Rank Sum Statistic';
run;

*The value of the probabilistic index represents that probability that the score for a patient
in the treatment group is greater than the score for a randomly chosen patient from the control group;

*The Somers' D Version;

proc freq data=p_index;
	tables treatment*score / measures;
run;

*From the PROC FREQ output somers_d is given the value of the Somers' D Correlation Coefficient
in the column given row (C!R) version. n is the sample size of the treatment group and 
m is the sample size of the control group;

%let somers_d = 0.5139;
%let n = 8;
%let m = 9;
%let s = (&n + &m);

%put &somers_d &n &m &s;

data somers_calc;

v =(0.5*(&n*(&n-1))) + (0.5*(&m*(&m-1)));

t = 0.5*(((&n)*(&m))-(&somers_d)*sqrt((&n)*(&m)*((.5*(&s)*(&s-1))-v)));

u = ((&n)*(&m)) - t;

p_index = u / ((&n)*(&m));

run;

proc print data=somers_calc; 
	title  'Probabilistic Index from the Somers D Correlation Coefficient';
run;

*The value of the probabilistic index represents that probability that the score for a patient 
in the treatment groupis greater than the score for a randomly chosen patient from the control group;

Coda:

proc logistic data=p_index;
	model treatment =score ;
run;

*Note the approximate equivalence of the probabilistic index and the c statistic as an estimate of the area under an ROC.
The interpretations of them, however, are different in the sense that the roles of the independent and dependent 
variables are reversed.;