options nocenter nodate nonumber  linesize=100 pagesize=500;  title;

/* Moderated Regression For A 2-way Interaction Between Continuous Variables */

/****************  START OF TRIAL-RUN DATA  ************************************
   The following commands generate artificial data that can be used for
   a trial-run of the program.  Just run this whole file.  */
data trialrun;
do cases = 1 to 200;
idv   = normal(1);
moder = normal(1);
e = ranuni(1);
x = idv * moder;
dv  = 8.7*idv +  -1.49*moder +  1.23*(idv * moder)  + e*30;
output;end;
proc means data = trialrun; run;
/****************  END OF TRIAL-RUN DATA  ************************************/



proc iml;
reset noname;

/* Enter the name of the dataset on the following USE statement.
   i.e., Replace "trialrun" with the name of your dataset. */
use trialrun;

/* Enter the names of the variables to be analyzed from your data set
   inside the curly brackets on the following READ statement.
   The order of the variable names must be : IDV, Moderator, DV  */
read all var { idv moder dv } into data;

/* Specify the number of chunks for the IDV */
chunkIDV = 7 ;

/* Specify the number of chunks for the MOD */
chunkMOD = 7 ;

/* Specify the method of determining highest & lowest values for the IDV & MOD
/* for an optimal design
/* Enter 1 to let the program do it automatically
/* Enter 2 to use your own preferred values */
optdes = 1;

/* If you entered 2 on the above "optdes =" statement, then enter your preferred
/* values on the next four statements: */

/* Enter the lowest value for the IDV in an optimal design */
lowIDV = 1;

/* Enter the highest value for the IDV in an optimal design */
highIDV = 5;

/* Enter the lowest value for the MOD in an optimal design */
lowMOD = 1;

/* Enter the highest value for the MOD in an optimal design */
highMOD = 5;

/* Specify the # of randomized data sets for the randomization
/* test of statistical significance.  Recommendations: 
/* use 100 data sets for a trial run, but use 1000 or more for final results */
permutes = 100;

/* End of required user specifications */


idv   = data[,1];
moder = data[,2];
dv    = data[,3];
show datasets; show contents;

print,,,, 'Moderated Regression For A 2-way Interaction Between Continuous Variables:';

/* row sums module */
start rsum(matname);
rsums =j(nrow(matname),1);
do rows = 1 to nrow(matname);
dumr = matname[rows,];
rsums[rows,1]=sum(dumr);
end;
return(rsums);
finish;

/* collumn sums module */
start csum(matname);
csums =j(1,ncol(matname));
do cols = 1 to ncol(matname);
dumc = matname[,cols];
csums[1,cols]=sum(dumc);
end;
return(csums);
finish;

/* column means module */
start cmean(matname);
moyennes = matname[+,] / nrow(matname);
return(moyennes);
finish;

/* column standard deviations module -- IML manual p 112 */
start cstd(matname);
N = nrow(matname);
matname = matname - repeat(cmean(matname),N,1);
ss = matname[##,];
csd = sqrt(ss/(N-1));
return(csd);
finish;

/* correlation matrix module  -- IML manual p 112 */
start corr(matname);
n = nrow(matname);
sum = matname[+,];
xpx = t(matname) * matname - t(sum) * sum / n;
s = diag(1 / sqrt(vecdiag(xpx)));
corr = s * xpx * s;
return(corr);
finish;

/* sample size */
bigN = nrow(idv);

/* centering the IDV & MOD */
idv = idv - cmean(idv);
moder = moder - cmean(moder);

/* 2-D frequency distribution. */
IDVmax = max(idv) + .00000001;
IDVmin = min(idv) - .00000001;
MODmax = max(moder) + .00000001;
MODmin = min(moder) - .00000001;
increm1 = (IDVmax - IDVmin) / chunkIDV;
increm2 = (MODmax - MODmin) / chunkMOD;
freqs = j(chunkIDV, chunkMOD, 0);
do luper = 1 to bigN;
IDVchunk =  ceil( ( idv[luper,1] - IDVmin ) / increm1 );
MODchunk =  ceil( ( moder[luper,1] - MODmin ) / increm2 );
freqs[ IDVchunk, MODchunk ] = freqs[ IDVchunk, MODchunk ] + 1;
end;

/* Moderated Regression */
x = idv # moder;
cr = corr( idv || moder || x || dv );
beta = inv(cr[1:3,1:3]) * cr[1:3,4];
mn = cmean(  idv || moder || x || dv  );
sd = cstd (  idv || moder || x || dv  );
b = (sd[1,4] / sd[1,1:3])   #  t(beta);
a = mn[1,4] - ( sum( mn[1,1:3] # b ) );
r2all  = t(beta) * cr[1:3,4];
r2main =  t( inv(cr[1:2,1:2])*cr[1:2,4] )  *  cr[1:2,4];
r2chXn = r2all - r2main;
fsquare = (r2all - r2main) / (1 - r2all);
r2chXn = r2all - r2main;
F = (r2all-r2main) / ((1-r2all)/(bigN-3-1));
dferror = bigN - 3 - 1;
pF = 1 - probf(F,1,dferror);

/* Moderated Regression including quadratic terms */
idvq = idv # idv;
moderq = moder # moder;
crq = corr( idv || moder || idvq || moderq || x || dv );
betaq = inv(crq[1:5,1:5]) * crq[1:5,6];
mnq = cmean( idv || moder || idvq || moderq || x || dv );
sdq =  cstd( idv || moder || idvq || moderq || x || dv  );
bq = (sdq[1,6] / sdq[1,1:5])   #  t(betaq);
aq = mnq[1,6] - ( sum( mnq[1,1:5] # bq ) );
r2allq  = t(betaq) * crq[1:5,6];
r2mainq =  t( inv(crq[1:4,1:4])*crq[1:4,6] )  *  crq[1:4,6];
r2chXnq = r2allq - r2mainq;
fsquareq = (r2allq - r2mainq) / (1 - r2allq);
r2chXnq = r2allq - r2mainq;
Fq = (r2allq-r2mainq) / ((1-r2allq)/(bigN-5-1));
dferrorq = bigN - 5 - 1;
pFq = 1 - probf(Fq,1,dferrorq);

/* f-squared = the proportion of systematic variance accounted for by the effect  
               relative to unexplained variance in the criterion (A & W, 1991, p. 157) */
fsquare = (r2all - r2main) / (1 - r2all);
fsquareq = (r2allq - r2mainq) / (1 - r2allq);

/* mse  Darlington p 121 */
xx = ( j(bigN,1,1) || idv || moder || x );
sse = t(dv) * dv  -  t( a // t(b)) * t(xx) * dv;
mse = sse / ( bigN - 3 - 1);

/* Vxzxz = the residual variance of the product XZ after controlling for X & Z (p. 378);
/*         2 ways of computing Vxzxz
/*         method 1 = the MSE that results from regressing XZ on X & Z (p. 381)
/*         method 2 = using the full mod reg equation, = 
/*         MSE / (n * stand. error of the estimate for the product##2) (p. 381) */

/* method 2: the full mod reg equation approach (yields slightly diff results in unusual sits)
/* Cohen & Cohen, 2003, p. 632 */
Rijm1 = inv(cr[1:3,1:3]); 
SExn = ( sd[1,4] / sd[1,3] ) * sqrt( (1 - r2all) / ( bigN - 3 - 1) ) * sqrt( Rijm1[3,3] ); 
Vxzxz2 = mse / ( ( bigN - 3 - 1) * SExn##2 );

/* method 1: using the MSE that results from regressing XZ on X & Z approach */
cr2 = corr( idv || moder || x );
beta2 = inv(cr2[1:2,1:2]) * cr2[1:2,3];
sd2 = cstd( idv || moder || x );
mn2 = cmean( idv || moder || x );
b2 = (sd2[1,3] / sd2[1,1:2])   #  t(beta2);
a2 = mn2[1,3] - ( sum( mn2[1,1:2] # b2 ) );
xx2 = ( j(bigN,1,1) || idv || moder );
sse2 = t(x) * x  -  t( a2 // t(b2)) * t(xx2) * x;
/* mse  Darlington p 121 */
mse2 = sse2 / ( bigN - 2 -1);
Vxzxz1 = mse2;

/* PRE, & Relative efficiency / optimal design values */

if optdes = 1 then do;
/* Using the mean scores in the top and bottoms chunks, based on the specified chunk sizes */
idvtop = -9999;
idvbot = -9999;
modtop = -9999;
modbot = -9999;
do luper = 1 to bigN;
if  idv[luper,1]   > (IDVmax - increm1) then idvtop = ( idvtop // idv[luper,1] );
if  idv[luper,1]   < (IDVmin + increm1) then idvbot = ( idvbot // idv[luper,1] );
if  moder[luper,1] > (MODmax - increm2) then modtop = ( modtop // moder[luper,1] );
if  moder[luper,1] < (MODmin + increm2) then modbot = ( modbot // moder[luper,1] );
end;
highIDV  = cmean(idvtop[2:nrow(idvtop),1]);
lowIDV   = cmean(idvbot[2:nrow(idvbot),1]);
highMOD  = cmean(modtop[2:nrow(modtop),1]);
lowMOD   = cmean(modbot[2:nrow(modbot),1]);
end;

/* maximim value of Vxzxz (p. 381, 383) */
maxVxzxz = (( (highIDV - lowIDV) / 2 ) ##2 )  *  (( (highMOD - lowMOD) / 2 ) ##2 );

/* Relative Efficiency  */
releffic = Vxzxz1 / maxVxzxz;

/* PRE squared partial correlation = proportional reduction in error (p 377, 384) */
/*   = the model improvement due to adding the product term */
PRE = 1 / ( 1+ ( mse / (b[1,3]##2 * Vxzxz1) ) );

/* PRE for an optimal design p 384 */
PRE2 = 1 / ( 1+ ( mse / (b[1,3]##2 * maxVxzxz) ) );

print "Sample Size:                                       " bigN;
print "IDV Mean:                                          " (mn[1,1]) [format=9.3];
print "IDV Standard Deviation:                            " (sd[1,1]) [format=9.3];
print "IDV Lowest Score:                                  " IDVmin [format=9.3];
print "IDV Highest Score:                                 " IDVmax [format=9.3];
print "MOD Mean:                                          " (mn[1,2]) [format=9.3];
print "MOD Standard Deviation:                            " (sd[1,2]) [format=9.3];
print "MOD Lowest Score:                                  " MODmin [format=9.3];
print "MOD Highest Score:                                 " MODmax [format=9.3];
print "Specified # of chunks for the IDV:                 " chunkIDV;
print "Specified # of chunks for the MOD:                 " chunkMOD;
print "IDV Low  Value for an Optimal Design:              " lowIDV [format=9.3];
print "IDV High Value for an Optimal Design:              " highIDV [format=9.3];
print "MOD Low  Value for an Optimal Design:              " lowMOD [format=9.3];
print "MOD High Value for an Optimal Design:              " highMOD [format=9.3];
print "Joint Frequencies of the IDV & MOD:",, freqs;
print "Vxzxz1 (residual variance of the product term):    " Vxzxz1 [format=9.3];
print "Vxzxz2 (residual variance of the product term):    " Vxzxz2 [format=9.3];
print "maxVxzxz (maximum possible value of Vxzxz):        " maxVxzxz [format=9.3];
print "Relative Efficiency (Vxzxz / maxVxzxz):            " releffic [format=9.3];
print "PRE (Proportional Reduction in Error):             " PRE [format=9.3];
print "PRE for the optimal design:                        " PRE2 [format=9.3];

print,,,, "Regression results for the equation that DOES NOT include quadratic terms:";
print "Slope coefficient for the IDV:                     " (b[1,1]) [format=9.3];
print "Slope coefficient for the MOD:                     " (b[1,2]) [format=9.3];
print "Slope coefficient for the product term:            " (b[1,3]) [format=9.3];
print "Intercept:                                         " a [format=9.3];
print "R-squared change for the interaction term:         " r2chXn [format=9.3];
print "f-squared effect size for the interaction term:    " fsquare [format=9.3];
print "F value for the interaction term:                  " F [format=9.3];
print "Significance level of F (tabled value):            " pF [format=9.6];
print "Specified number of randomized data sets:          " permutes;

/* Analyses of the randomized data */
counter = 0;
counterq = 0;
do nperms = 1 to permutes;

/*  The raw data permutations are based on column-wise random shufflings
/*  of the values in the raw data matrix using Castellan's (1992, 
/*  BRMIC, 24, 72-77) algorithm; The distributions of the original 
/*  raw variables are exactly preserved in the shuffled versions used
/*  in the parallel analyses. */

/* data matrix to permute */
permdata = ( idv || moder || dv );
do luper = 1 to (bigN -1);
k1 = int( (bigN - luper + 1) * ranuni(1) + 1 )  + luper - 1;
k2 = int( (bigN - luper + 1) * ranuni(1) + 1 )  + luper - 1;
k3 = int( (bigN - luper + 1) * ranuni(1) + 1 )  + luper - 1;
d1 = permdata[luper,1];
d2 = permdata[luper,2];
d3 = permdata[luper,3];
permdata[luper,1] = permdata[k1,1];
permdata[luper,2] = permdata[k2,2];
permdata[luper,3] = permdata[k3,3];
permdata[k1,1] = d1;
permdata[k2,2] = d2;
permdata[k3,3] = d3;
end;
idv2 = permdata[,1];
mod2 = permdata[,2];
dv2  = permdata[,3];

/* Moderated Regression */
x2 = idv2 # mod2;
cr2 = corr( idv2 || mod2 || x2 || dv2 );
beta2 = inv(cr2[1:3,1:3]) * cr2[1:3,4];
r2all2  = t(beta2) * cr2[1:3,4];
r2main2 =  t( inv(cr2[1:2,1:2])*cr2[1:2,4]) * cr2[1:2,4];
F2 = (r2all2-r2main2) / ((1-r2all2)/(bigN-3-1));
if F2 >= F then  counter = counter + 1;

/* Moderated Regression including quadratic terms */
idvq2 = idv2 # idv2;
modq2 = mod2 # mod2;
crq2 = corr( idv2 || mod2 || idvq2 || modq2 || x2 || dv2 );
betaq2 = inv(crq2[1:5,1:5]) * crq2[1:5,6];
r2allq2  = t(betaq2) * crq2[1:5,6];
r2mainq2 =  t( inv(crq2[1:4,1:4])*crq2[1:4,6] )  *  crq2[1:4,6];
Fq2 = (r2allq2-r2mainq2) / ((1-r2allq2)/(bigN-5-1));
if Fq2 >= Fq then  counterq = counterq + 1;

end;

/* significance level computation from Noreen (1989, p. 56) */
siglevel  = (counter  + 1) / (permutes + 1);
siglevq = (counterq + 1) / (permutes + 1);

print "Significance level of F (randomization test):      " siglevel [format=9.6];

print,,,, "Regression results for the equation that INCLUDES quadratic terms:";
print "Slope coefficient for the IDV:                     " (bq[1,1]) [format=9.3];
print "Slope coefficient for the MOD:                     " (bq[1,2]) [format=9.3];
print "Slope coefficient for the IDV quadratic term:      " (bq[1,3]) [format=9.3];
print "Slope coefficient for the MOD quadratic term:      " (bq[1,4]) [format=9.3];
print "Slope coefficient for the product term:            " (bq[1,5]) [format=9.3];
print "Intercept:                                         " aq [format=9.3];
print "R-squared change for the interaction term:         " r2chXnq [format=9.3];
print "f-squared effect size for the interaction term:    " fsquareq [format=9.3];
print "F value for the interaction term:                  " Fq [format=9.3];
print "Significance level of F (tabled value):            " pFq [format=9.6];
print "Specified number of randomized data sets:          " permutes;
print "Significance level of F (randomization test):      " siglevq [format=9.6];

/* saving data for plots */
freqsout = (-9999 || -9999 || -9999);
do luper = 1 to chunkIDV;
do lupec = 1 to chunkMOD;
freqsout = ( freqsout // (luper || lupec || freqs[luper,lupec]) );
end;end;
freqsout = freqsout[2:nrow(freqsout),];

create plotdata from freqsout[colname={"IDV" "MOD" "Freqs"}]; append from freqsout;

quit;

/* The following commands generate graphs of the bivariate frequency
   distribution.  They require SAS/GRAPH.  */
goptions reset=global gunit=pct border cback=white
         colors=(blue green red) ctext=black
         ftitle=swissb ftext=swiss htitle=4 htext=3  hpos=120 vpos=52 ;

proc GCHART data = plotdata; block IDV / sumvar=Freqs group = MOD discrete; run;

proc g3d data=plotdata; scatter idv*mod=Freqs / caxis=black;run;

proc g3d data=plotdata; plot idv*mod=Freqs / caxis=black ;run;