options nocenter nodate nonumber  linesize=100 pagesize=500;  title;

/* Moderated Regression For A 3-way Interaction Between Continuous Variables */

/****************  START OF TRIAL-RUN DATA  ************************************
   The following commands generate artificial data that can be used for
   a trial-run of the program.  Just run this whole file.  */
data trialrun;
do cases = 1 to 200;
idv  = normal (1);
mod1 = normal (1);
mod2 = normal (1);
e = ranuni (1);
dv = 8.7*idv + -1.49*mod1 + 2.2*mod2 + 1.23*idv*mod1 + 3.1*idv*mod2
      + -1.3*idv*mod1*mod2 + e*30;
output;end;
proc means data = trialrun;run;
/****************  END OF TRIAL-RUN DATA  ************************************/



proc iml;
reset noname;

/* Enter the name of the dataset on the following USE statement.
   i.e., Replace "trialrun" with the name of your dataset. */
use trialrun;

/* Enter the names of the variables to be analyzed from your data set
   inside the curly brackets on the following READ statement.
   The order of the variable names must be : IDV, Mod1, Mod2, DV  */
read all var { idv mod1 mod2 dv } into data;

/* Specify the number of chunks for the IDV */
chunkIDV = 5 ;

/* Specify the number of chunks for MOD1 */
chunkMOD1 = 5 ;

/* Specify the number of chunks for MOD1 */
chunkMOD2 = 5 ;

/* Specify the method of determining highest & lowest values for the IDV & MOD
/* for an optimal design
/* Enter 1 to let the program do it automatically
/* Enter 2 to use your own preferred values */
optdes = 1;

/* If you entered 2 on the above "optdes =" statement, then enter your preferred
/* values on the next four statements: */

/* Enter the lowest value for the IDV in an optimal design */
lowIDV  = 1;

/* Enter the highest value for the IDV in an optimal design */
highIDV  = 5;

/* Enter the lowest value for MOD1 in an optimal design */
lowMOD1  = 1;

/* Enter the highest value for MOD1 in an optimal design */
highMOD1 = 5;

/* Enter the lowest value for MOD1 in an optimal design */
lowMOD2  = 1;

/* Enter the highest value for MOD1 in an optimal design */
highMOD2 = 5;

/* Specify the # of randomized data sets for the randomization
/* test of statistical significance.  Recommendations: 
/* use 100 data sets for a trial run, but use 1000 or more for final results */
permutes = 10;

/* End of required user specifications */


idv  = data[,1];
mod1 = data[,2];
mod2 = data[,3];
dv   = data[,4];
show datasets; show contents;

print, 'Moderated Regression For A 3-way Interaction Between Continuous Variables:';

/* row sums module */
start rsum(matname);
rsums =j(nrow(matname),1);
do rows = 1 to nrow(matname);
dumr = matname[rows,];
rsums[rows,1]=sum(dumr);
end;
return(rsums);
finish;

/* collumn sums module */
start csum(matname);
csums =j(1,ncol(matname));
do cols = 1 to ncol(matname);
dumc = matname[,cols];
csums[1,cols]=sum(dumc);
end;
return(csums);
finish;

/* column means module */
start cmean(matname);
moyennes = matname[+,] / nrow(matname);
return(moyennes);
finish;

/* column standard deviations module -- IML manual p 112 */
start cstd(matname);
N = nrow(matname);
matname = matname - repeat(cmean(matname),N,1);
ss = matname[##,];
csd = sqrt(ss/(N-1));
return(csd);
finish;

/* correlation matrix module  -- IML manual p 112 */
start corr(matname);
n = nrow(matname);
sum = matname[+,];
xpx = t(matname) * matname - t(sum) * sum / n;
s = diag(1 / sqrt(vecdiag(xpx)));
corr = s * xpx * s;
return(corr);
finish;

/* sample size */
bigN = nrow(idv);

/* centering the IDV & MOD1 & MOD2 */
idv  = idv  - cmean(idv);
mod1 = mod1 - cmean(mod1);
mod2 = mod2 - cmean(mod2);

/* 3-D min, max, increm */
IDVmax  = max(idv) + .00000001;
IDVmin  = min(idv) - .00000001;
MOD1max = max(mod1) + .00000001;
MOD1min = min(mod1) - .00000001;
MOD2max = max(mod2) + .00000001;
MOD2min = min(mod2) - .00000001;
increm1 = (IDVmax - IDVmin) / chunkIDV;
increm2 = (MOD1max - MOD1min) / chunkMOD1;
increm3 = (MOD2max - MOD2min) / chunkMOD2;

/* Moderated Regression */
x1 = idv  # mod1;
x2 = idv  # mod2;
x3 = mod1 # mod2;
x4 = idv # mod1 # mod2;
cr = corr( idv || mod1 || mod2 || x1 || x2 || x3 || x4 || dv );
beta = inv(cr[1:7,1:7]) * cr[1:7,8];
sd = cstd( idv || mod1 || mod2 || x1 || x2 || x3 || x4 || dv );
mn = cmean( idv || mod1 || mod2 || x1 || x2 || x3 || x4 || dv );
b = (sd[1,8] / sd[1,1:7])   #  t(beta);
a = mn[1,8] - ( sum( mn[1,1:7] # b ) );
r2all  = t(beta) * cr[1:7,8];
r2main =  t( inv(cr[1:6,1:6])*cr[1:6,8])  *  cr[1:6,8];
r2chXn = r2all - r2main;
F = (r2all-r2main) / ((1-r2all)/(bigN-7-1));
dferror = bigN - 7 - 1;
pF = 1 - probf(F,1,dferror);

/* Moderated Regression including quadratic terms */
idvq = idv # idv;
mod1q = mod1 # mod1;
mod2q = mod2 # mod2;
crq = corr( idv || mod1 || mod2 || idvq || mod1q|| mod2q ||
            x1 || x2 || x3 || x4 || dv );
betaq = inv(crq[1:10,1:10]) * crq[1:10,11];
mnq = cmean( idv || mod1 || mod2 || idvq || mod1q|| mod2q ||
            x1 || x2 || x3 || x4 || dv );
sdq =  cstd( idv || mod1 || mod2 || idvq || mod1q|| mod2q ||
            x1 || x2 || x3 || x4 || dv  );
bq = (sdq[1,11] / sdq[1,1:10])   #  t(betaq);
aq = mnq[1,11] - ( sum( mnq[1,1:10] # bq ) );
r2allq  = t(betaq) * crq[1:10,11];
r2mainq =  t( inv(crq[1:9,1:9])*crq[1:9,11] )  *  crq[1:9,11];
fsquareq = (r2allq - r2mainq) / (1 - r2allq);
r2chXnq = r2allq - r2mainq;
Fq = (r2allq-r2mainq) / ((1-r2allq)/(bigN-10-1));
dferrorq = bigN - 10 - 1;
pFq = 1 - probf(Fq,1,dferrorq);

/* f-squared = the proportion of systematic variance accounted for by the effect 
/*             relative to unexplained variance in the criterion (A & W, 1991, p. 157) */
fsquare = (r2all - r2main) / (1 - r2all);
fsquareq = (r2allq - r2mainq) / (1 - r2allq);

/* mse  Darlington p 121 */
xx = ( j(bigN,1,1) || idv || mod1 || mod2 || x1 || x2 || x3 || x4 );
sse = t(dv) * dv  -  t( a // t(b)) * t(xx) * dv;
mse = sse / ( bigN - 7 - 1);

/* Vxzxz = the residual variance of the product XZ after controlling for X & Z (p. 378);
/*         2 ways of computing Vxzxz
/*         method 1 = the MSE that results from regressing XZ on X & Z (p. 381)
/*         method 2 = using the full mod reg equation, = 
/*         MSE / (n * stand. error of the estimate for the product##2) (p. 381) */

/* method 2: the full mod reg equation approach (yields slightly diff results in unusual sits)
/* Cohen & Cohen, 2003, p. 632 */
Rijm1 = inv(cr[1:7,1:7]); 
SExn = ( sd[1,8] / sd[1,7] ) * sqrt( (1 - r2all) / ( bigN - 7 - 1) ) * sqrt( Rijm1[7,7] ); 
Vxzxz2 = mse / ( ( bigN - 7 - 1) * SExn##2 );

/* method 1: using the MSE that results from regressing XZ on X & Z approach */
cr2 = corr( idv || mod1 || mod2 || x1 || x2 || x3 || x4 );
beta2 = inv(cr2[1:6,1:6]) * cr2[1:6,7];
sd2 = cstd(  idv || mod1 || mod2 || x1 || x2 || x3 || x4 );
mn2 = cmean( idv || mod1 || mod2 || x1 || x2 || x3 || x4 );
b2 = (sd2[1,7] / sd2[1,1:6])   #  t(beta2);
a2 = mn2[1,7] - ( sum( mn2[1,1:6] # b2 ) );
xx2 = ( j(bigN,1,1) || idv || mod1 || mod2 || x1 || x2 || x3 );
sse2 = t(x4) * x4  -  t( a2 // t(b2)) * t(xx2) * x4;
/* mse  Darlington p 121 */
mse2 = sse2 / ( bigN - 6 -1);
Vxzxz1 = mse2;

/* PRE, & Relative efficiency / optimal design values */

if optdes = 1 then do;
/* Using the mean scores in the top and bottoms chunks, based on the specified chunk sizes */
idvtop  = -9999;
idvbot  = -9999;
mod1top = -9999;
mod1bot = -9999;
mod2top = -9999;
mod2bot = -9999;
do luper = 1 to nrow(idv);
if  idv[luper,1]  > (IDVmax  - increm1) then  idvtop  = ( idvtop  // idv[luper,1] );
if  idv[luper,1]  < (IDVmin  + increm1) then  idvbot  = ( idvbot  // idv[luper,1] );
if  mod1[luper,1] > (MOD1max - increm2) then  mod1top = ( mod1top // mod1[luper,1] );
if  mod1[luper,1] < (MOD1min + increm2) then  mod1bot = ( mod1bot // mod1[luper,1] );
if  mod2[luper,1] > (MOD2max - increm3) then  mod2top = ( mod2top // mod2[luper,1] );
if  mod2[luper,1] < (MOD2min + increm3) then  mod2bot = ( mod2bot // mod2[luper,1] );
end;
highIDV   = cmean(idvtop[2:nrow(idvtop),1]);
lowIDV    = cmean(idvbot[2:nrow(idvbot),1]);
highMOD1  = cmean(mod1top[2:nrow(mod1top),1]);
lowMOD1   = cmean(mod1bot[2:nrow(mod1bot),1]);
highMOD2  = cmean(mod2top[2:nrow(mod2top),1]);
lowMOD2   = cmean(mod2bot[2:nrow(mod2bot),1]);
end;

/* maximim value of Vxzxz (p. 381, 383) */
maxVxzxz = (( (highIDV - lowIDV) / 2 ) ##2 )  *  (( (highMOD1 - lowMOD1) / 2 ) ##2 )  *  (( (highMOD2 - lowMOD2) / 2 ) ##2 );

/* Relative Efficiency  */
releffic = Vxzxz1 / maxVxzxz;

/* PRE squared partial correlation = proportional reduction in error (p 377, 384) */
/*   = the model improvement due to adding the product term */
PRE = 1 / ( 1+ ( mse / (b[1,7]##2 * Vxzxz1) ) );

/* PRE for an optimal design p 384 */
PRE2 = 1 / ( 1+ ( mse / (b[1,7]##2 * maxVxzxz) ) );

print "Sample Size                                     " bigN;
print "IDV Mean:                                       " (mn[1,1]) [format=9.3];
print "IDV Standard Deviation:                         " (sd[1,1]) [format=9.3];
print "IDV Lowest Score:                               " IDVmin [format=9.3];
print "IDV Highest Score:                              " IDVmax [format=9.3];
print "MOD1 Mean:                                      " (mn[1,2]) [format=9.3];
print "MOD1 Standard Deviation:                        " (sd[1,2]) [format=9.3];
print "MOD1 Lowest Score:                              " MOD1min [format=9.3];
print "MOD1 Highest Score:                             " MOD1max [format=9.3];
print "MOD2 Mean:                                      " (mn[1,3]) [format=9.3];
print "MOD2 Standard Deviation:                        " (sd[1,3]) [format=9.3];
print "MOD2 Lowest Score:                              " MOD2min [format=9.3];
print "MOD2 Highest Score:                             " MOD2max [format=9.3];
print "Specified # of chunks for IDV:                  " chunkIDV;
print "Specified # of chunks for MOD1:                 " chunkMOD1;
print "Specified # of chunks for MOD2:                 " chunkMOD2;
print "IDV Low  Value for an Optimal Design:           " lowIDV [format=9.3];
print "IDV High Value for an Optimal Design:           " highIDV [format=9.3];
print "MOD1 Low  Value for an Optimal Design:          " lowMOD1 [format=9.3];
print "MOD1 High Value for an Optimal Design:          " highMOD1 [format=9.3];
print "MOD2 Low  Value for an Optimal Design:          " lowMOD2 [format=9.3];
print "MOD2 High Value for an Optimal Design:          " highMOD2 [format=9.3];
print "Vxzxz1 (residual variance of the product):      " Vxzxz1 [format=9.3];
print "Vxzxz2 (residual variance of the product):      " Vxzxz2 [format=9.3];
print "maxVxzxz (maximum possible value of Vxzxz):     " maxVxzxz [format=9.3];
print "Relative Efficiency (Vxzxz / maxVxzxz):         " releffic [format=9.3];
print "PRE (Proportional Reduction in Error):          " PRE [format=9.3];
print "PRE for the optimal design:                     " PRE2 [format=9.3];

print,,,, "Regression results for the equation that DOES NOT include quadratic terms:";
print "Slope coefficient for IDV:                      " (b[1,1]) [format=9.3];
print "Slope coefficient for MOD1:                     " (b[1,2]) [format=9.3];
print "Slope coefficient for MOD2:                     " (b[1,3]) [format=9.3];
print "Slope coefficient for IDV * MOD1:               " (b[1,4]) [format=9.3];
print "Slope coefficient for IDV * MOD2:               " (b[1,5]) [format=9.3];
print "Slope coefficient for MOD1 * MOD2:              " (b[1,6]) [format=9.3];
print "Slope coefficient for IDV * MOD1 * MOD2:        " (b[1,7]) [format=9.3];
print "Intercept:                                      " a [format=9.3];
print "R-squared change for the interaction term;      " r2chXn [format=9.3];
print "f-squared effect size for the interaction term: " fsquare [format=9.3];
print "F value for the interaction term:               " F [format=9.3];
print "Significance level of F (tabled value):         " pF [format=9.6];
print "Specified number of randomized data sets:       " permutes;

/* Analyses of the randomized data */
counter = 0;
counterq = 0;
do nperms = 1 to permutes;

/*  The raw data permutations are based on column-wise random shufflings
/*  of the values in the raw data matrix using Castellan's (1992, 
/*  BRMIC, 24, 72-77) algorithm; The distributions of the original 
/*  raw variables are exactly preserved in the shuffled versions used
/*  in the parallel analyses. */

/* data matrix to permute */
permdata = ( idv || mod1 || mod2 || dv );
do luper = 1 to (bigN -1);
k1 = int( (bigN - luper + 1) * ranuni(1) + 1 )  + luper - 1;
k2 = int( (bigN - luper + 1) * ranuni(1) + 1 )  + luper - 1;
k3 = int( (bigN - luper + 1) * ranuni(1) + 1 )  + luper - 1;
k4 = int( (bigN - luper + 1) * ranuni(1) + 1 )  + luper - 1;
d1 = permdata[luper,1];
d2 = permdata[luper,2];
d3 = permdata[luper,3];
d4 = permdata[luper,4];
permdata[luper,1] = permdata[k1,1];
permdata[luper,2] = permdata[k2,2];
permdata[luper,3] = permdata[k3,3];
permdata[luper,4] = permdata[k4,4];
permdata[k1,1] = d1;
permdata[k2,2] = d2;
permdata[k3,3] = d3;
permdata[k3,4] = d4;
end;

idv12 = permdata[,1];
mod12 = permdata[,2];
mod22 = permdata[,3];
dv12  = permdata[,4];

/* Moderated Regression */
x12 = idv12  # mod12;
x22 = idv12  # mod22;
x32 = mod12  # mod22;
x42 = idv12  # mod12 # mod22;
cr2 = corr( idv12 || mod12 || mod22 || x12 || x22 || x32 || x42 || dv12 );
beta2 = inv(cr2[1:7,1:7]) * cr2[1:7,8];
r2all2  = t(beta2) * cr2[1:7,8];
r2main2 =  t( inv(cr2[1:6,1:6])*cr2[1:6,8])  *  cr2[1:6,8];
r2chXn2 = r2all2 - r2main2;
fsquare2 = (r2all2 - r2main2) / (1 - r2all2);
F2 = (r2all2-r2main2) / ((1-r2all2)/(bigN-7-1));
if F2 >= F then  counter = counter + 1;

/* Moderated Regression including quadratic terms */
idvq2 = idv12 # idv12;
mod1q2 = mod12 # mod12;
mod2q2 = mod22 # mod22;
crq2 = corr( idv12 || mod12 || mod22 || idvq2 || mod1q2 || mod2q2 || x12 ||
            x22 || x32 || x42 || dv12 );
betaq2 = inv(crq2[1:10,1:10]) * crq2[1:10,11];
r2allq2  = t(betaq2) * crq2[1:10,11];
r2mainq2 =  t( inv(crq2[1:9,1:9])*crq2[1:9,11] )  *  crq2[1:9,11];
Fq2 = (r2allq2-r2mainq2) / ((1-r2allq2)/(bigN-10-1));
if Fq2 >= Fq then  counterq = counterq + 1;

end;

/* significance level computation from Noreen (1989, p. 56) */
siglevel  = (counter  + 1) / (permutes + 1);
siglevq = (counterq + 1) / (permutes + 1);

print "Significance level of F (randomization test):   " siglevel [format=9.6];

print,,,, "Regression results for the equation that INCLUDES quadratic terms:";
print "Slope coefficient for IDV:                      " (bq[1,1]) [format=9.3];
print "Slope coefficient for MOD1:                     " (bq[1,2]) [format=9.3];
print "Slope coefficient for MOD2:                     " (bq[1,3]) [format=9.3];
print "Slope coefficient for the IDV quadratic term:   " (bq[1,4]) [format=9.3];
print "Slope coefficient for the MOD1 quadratic term:  " (bq[1,5]) [format=9.3];
print "Slope coefficient for the MOD2 quadratic term:  " (bq[1,6]) [format=9.3];
print "Slope coefficient for IDV * MOD1:               " (bq[1,7]) [format=9.3];
print "Slope coefficient for IDV * MOD2:               " (bq[1,8]) [format=9.3];
print "Slope coefficient for MOD1 * MOD2:              " (bq[1,9]) [format=9.3];
print "Slope coefficient for IDV * MOD1 * MOD2:        " (bq[1,10]) [format=9.3];
print "Intercept:                                      " aq [format=9.3];
print "R-squared change for the interaction term;      " r2chXnq [format=9.3];
print "f-squared effect size for the interaction term: " fsquareq [format=9.3];
print "F value for the interaction term:               " Fq [format=9.3];
print "Significance level of F (tabled value):         " pFq [format=9.6];
print "Specified number of randomized data sets:       " permutes;
print "Significance level of F (randomization test):   " siglevq [format=9.6];
quit;