* Moderated Regression For A 3-way Interaction Between Continuous Variables.


****************  START OF TRIAL-RUN DATA  *****************************
   The following commands generate artificial data that can be used for
   a trial-run of the program.  Just run this whole file.  
new file.
input program.
loop #a=1 to 200.
compute idv = normal (1).
compute mod1 = normal (1).
compute mod2 = normal (1).
compute e = uniform (1).
end case.
end loop.
end file.
end input program.
compute dv   = 8.7*idv +  -1.49*mod1 +  2.2*mod2 + 1.23*idv*mod1 +
                      3.1*idv*mod2 + -1.3*idv*mod1*mod2  + e*300.
descriptives var = all.
****************  END OF TRIAL-RUN DATA  ************************************


set mxloops=90000 printback=off width=80  seed = 1953125.
matrix.

* Specify the data to be analyzed on the following GET statement, as in the example below;
  "FILE = * " will use the currently active SPSS data set;
  "FILE = C:\filename" will use the specified SPSS data file on your computer;
  On the GET statement, the data matrix must be named DATA (as in the example);
  Enter the names of the variables to be analyzed from your data set
  after "VAR = ".  The order of the variable names must be : IDV, Mod1, Mod2, DV,
  with a comma between each variable name.

get data / file = * / var = idv, mod1, mod2, dv.


* Specify the number of chunks for the IDV.
compute chunkIDV = 5 .

* Specify the number of chunks for MOD1.
compute chnkMOD1 = 5 .

* Specify the number of chunks for MOD1.
compute chnkMOD2 = 5 .

* Specify the method of determining highest & lowest values for the IDV & MOD
* for an optimal design
* Enter 1 to let the program do it automatically
* Enter 2 to use your own preferred values.
compute optdes = 1.

* If you entered 2 on the above "optdes =" statement, then enter your preferred
* values on the next four statements.

* Enter the lowest value for the IDV in an optimal design.
compute lowIDV  = 1.

* Enter the highest value for the IDV in an optimal design.
compute highIDV  = 5.

* Enter the lowest value for MOD1 in an optimal design.
compute lowMOD1  = 1.

* Enter the highest value for MOD1 in an optimal design.
compute highMOD1 = 5.

* Enter the lowest value for MOD1 in an optimal design.
compute lowMOD2  = 1.

* Enter the highest value for MOD1 in an optimal design.
compute highMOD2 = 5.

* Specify the # of randomized data sets for the randomization
* test of statistical significance.  Recommendations: 
* use 100 data sets for a trial run, but use 1000 or more for final results.
compute permutes = 10.

* End of required user specifications.


compute idv      = data(:,1).
compute mod1  = data(:,2).
compute mod2  = data(:,3).
compute dv       = data(:,4).

print /title="Moderated Regression For A 3-way Interaction Between Continuous Variables:".

* sample size.
compute bigN = nrow(idv).

* centering the IDV & MOD1 & MOD2.
compute idv    = idv - (csum(idv)/bigN).
compute mod1 = mod1 - (csum(mod1)/nrow(mod1)).
compute mod2 = mod2 - (csum(mod2)/nrow(mod2)).

* 3-D min, max, increm.
compute IDVmax  = cmax(idv) + .00000001.
compute IDVmin  = cmin(idv) - .00000001.
compute MOD1max = cmax(mod1) + .00000001.
compute MOD1min = cmin(mod1) - .00000001.
compute MOD2max = cmax(mod2) + .00000001.
compute MOD2min = cmin(mod2) - .00000001.
compute increm1 = (IDVmax - IDVmin) / chunkIDV.
compute increm2 = (MOD1max - MOD1min) / chnkMOD1.
compute increm3 = (MOD2max - MOD2min) / chnkMOD2.

* Moderated Regression.
compute x1 = idv  &* mod1.
compute x2 = idv  &* mod2.
compute x3 = mod1 &* mod2.
compute x4 = idv &* mod1 &* mod2.
compute datam = {idv, mod1, mod2, x1, x2, x3, x4, dv}.
* mean, sd, & correlation matrix (Bernstein, p. 77-79).
compute rawsp = t(datam) * datam .
compute rsums = t(csum(datam)).
compute mn = t(rsums) / bigN.
compute corsp = rawsp - (1/bigN) * (rsums) * t(rsums) .
compute vcv = corsp * (1/(bigN-1)).
compute sd = t(sqrt(diag(vcv))).
compute d = inv(mdiag(sqrt(diag(vcv)))).
compute cr = d * vcv * d.
compute beta = inv(cr(1:7,1:7)) * cr(1:7,8).
compute b = (sd(1,8) &/ sd(1,1:7))   &*  t(beta).
compute a = mn(1,8) - rsum( mn(1,1:7) &* b ).
compute r2all  = t(beta) * cr(1:7,8).
compute r2main =  t(( inv(cr(1:6,1:6))*cr(1:6,8)))  *  cr(1:6,8).
compute r2chXn = r2all - r2main.
compute F = (r2all-r2main) / ((1-r2all)/(bigN-7-1)).
compute dferror = bigN - 7 - 1.
compute pF = 1 - fcdf(F,1,dferror).

* Moderated Regression including quadratic terms.
compute idvq = idv  &* idv.
compute mod1q = mod1  &* mod1.
compute mod2q = mod2 &* mod2.
compute datamq = {idv, mod1, mod2, idvq, mod1q, mod2q, x1, x2, x3, x4, dv}.
* mean, sd, & correlation matrix (Bernstein, p. 77-79).
compute rawspq = t(datamq) * datamq .
compute rsumsq = t(csum(datamq)).
compute mnq = t(rsumsq) / bigN.
compute corspq = rawspq - (1/bigN) * (rsumsq) * t(rsumsq) .
compute vcvq = corspq * (1/(bigN-1)).
compute sdq = t(sqrt(diag(vcvq))).
compute dq = inv(mdiag(sqrt(diag(vcvq)))).
compute crq = dq * vcvq * dq.
compute betaq = inv(crq(1:10,1:10)) * crq(1:10,11).
compute bq = (sdq(1,11) &/ sdq(1,1:10))   &*  t(betaq).
compute aq = mnq(1,11) - rsum( mnq(1,1:10) &* bq ).
compute r2allq  = t(betaq) * crq(1:10,11).
compute r2mainq =  t(( inv(crq(1:9,1:9))*crq(1:9,11)))  *  crq(1:9,11).
compute r2chXnq = r2allq - r2mainq.
compute Fq = (r2allq-r2mainq) / ((1-r2allq)/(bigN-10-1)).
compute dferrorq = bigN - 10 - 1.
compute pFq = 1 - fcdf(Fq,1,dferrorq).

* f-squared = the proportion of systematic variance accounted for by the effect 
*             relative to unexplained variance in the criterion (A & W, 1991, p. 157).
compute fsquare = (r2all - r2main) / (1 - r2all).
compute fsquareq = (r2allq - r2mainq) / (1 - r2allq).

* mse  Darlington p 121.
compute xx = { make(bigN,1,1), idv, mod1, mod2, x1, x2, x3, x4  }.
compute sse = t(dv) * dv  -  t({ a; t(b)}) * t(xx) * dv.
compute mse = sse / ( bigN - 7 - 1).

* Vxzxz = the residual variance of the product XZ after controlling for X & Z (p. 378).
*         2 ways of computing Vxzxz
*         method 1 = the MSE that results from regressing XZ on X & Z (p. 381)
*         method 2 = using the full mod reg equation, = 
*         MSE / (n * stand. error of the estimate for the product&**2) (p. 381).

* method 2: the full mod reg equation approach (yields slightly diff results in unusual sits).
* Cohen & Cohen, 2003, p. 632.
compute Rijm1 = inv(cr(1:7,1:7)).
compute SExn = ( sd(1,8) &/ sd(1,7) ) * sqrt( (1 - r2all) / ( bigN - 7 - 1) ) * sqrt( Rijm1(7,7) ).
compute Vxzxz2 = mse / ( ( bigN - 7 - 1) * SExn&**2 ).

* method 1: using the MSE that results from regressing XZ on X & Z approach.
compute datam2 = { idv, mod1, mod2, x1, x2, x3, x4}.
* mean, sd, & correlation matrix (Bernstein, p. 77-79).
compute rawsp2 = t(datam2) * datam2 .
compute rsums2 = t(csum(datam2)).
compute mn2 = t(rsums2) / bigN.
compute corsp2 = rawsp2 - (1/bigN) * (rsums2) * t(rsums2) .
compute vcv2 = corsp2 * (1/(bigN-1)).
compute sd2 = t(sqrt(diag(vcv2))).
compute d2 = inv(mdiag(sqrt(diag(vcv2)))).
compute cr2 = d2 * vcv2 * d2.
compute beta2 = inv(cr2(1:6,1:6)) * cr2(1:6,7).
compute b2 = (sd2(1,7) &/ sd2(1,1:6))   &*  t(beta2).
compute a2 = mn2(1,7) - rsum( mn2(1,1:6) &* b2).
compute xx2 = { make(bigN,1,1), idv, mod1, mod2, x1, x2, x3 }.
compute sse2 = t(x4) * x4  -  t({ a2; t(b2)}) * t(xx2) * x4.
* mse  Darlington p 121.
compute mse2 = sse2 / ( bigN - 6 -1).
compute Vxzxz1 = mse2.


* PRE, & Relative efficiency / optimal design values.

do if ( optdes = 1).
* Using the csum scores in the top and bottoms chunks, based on the specified chunk nrows.
compute idvtop  = -9999.
compute idvbot  = -9999.
compute mod1top = -9999.
compute mod1bot = -9999.
compute mod2top = -9999.
compute mod2bot = -9999.
loop #luper = 1 to bigN.
do if (  idv(#luper,1)  > (IDVmax  - increm1) ).
compute idvtop  = { idvtop  ; idv(#luper,1) }.
end if.
do if (  idv(#luper,1)  < (IDVmin  + increm1) ).
compute idvbot  = { idvbot  ; idv(#luper,1) }.
end if.
do if (  mod1(#luper,1) > (MOD1max - increm2) ).
compute mod1top = { mod1top ; mod1(#luper,1) }.
end if.
do if (  mod1(#luper,1) < (MOD1min + increm2) ).
compute mod1bot = { mod1bot ; mod1(#luper,1) }.
end if.
do if (  mod2(#luper,1) > (MOD2max - increm3) ).
compute mod2top = { mod2top ; mod2(#luper,1) }.
end if.
do if (  mod2(#luper,1) < (MOD2min + increm3) ).
compute mod2bot = { mod2bot ; mod2(#luper,1) }.
end if.
end loop.
compute idvtop  = idvtop(2:nrow(idvtop),1).
compute idvbot  = idvbot(2:nrow(idvbot),1).
compute mod1top = mod1top(2:nrow(mod1top),1).
compute mod1bot = mod1bot(2:nrow(mod1bot),1).
compute mod2top = mod2top(2:nrow(mod2top),1).
compute mod2bot = mod2bot(2:nrow(mod2bot),1).
compute highIDV   = csum(idvtop)  / nrow(idvtop).
compute lowIDV    = csum(idvbot)  / nrow(idvbot).
compute highMOD1  = csum(mod1top) / nrow(mod1top).
compute lowMOD1   = csum(mod1bot) / nrow(mod1bot).
compute highMOD2  = csum(mod2top) / nrow(mod2top).
compute lowMOD2   = csum(mod2bot) / nrow(mod2bot).
end if.

* maximim value of Vxzxz (p. 381, 383).
compute maxVxzxz = (((highIDV - lowIDV) / 2) &**2 ) * 
     (((highMOD1 - lowMOD1) / 2) &**2)  *  (((highMOD2 - lowMOD2) / 2) &**2).

* Relative Efficiency .
compute releffic = Vxzxz1 / maxVxzxz.

* PRE squared partial correlation = proportional reduction in error (p 377, 384).
*   = the model improvement due to adding the product term.
compute PRE = 1 / ( 1+ ( mse / (b(1,7)&**2 * Vxzxz1) ) ).

* PRE for an optimal design p 384.
compute PRE2 = 1 / ( 1+ ( mse / (b(1,7)&**2 * maxVxzxz) ) ).


print /title="Moderated Regression For A 3-way Interaction Between Continuous Variables:".
print bigN /title="Sample Size".
print mn(1,1) /format "f9.3"/title="IDV Mean: ".
print sd(1,1) /format "f9.3" /title="IDV Standard Deviation:".
print IDVmin  /format "f9.3" /title="IDV Lowest Score:".
print IDVmax  /format "f9.3" /title="IDV Highest Score:".
print mn(1,2) /format "f9.3" /title="MOD1 Mean: ".
print sd(1,2) /format "f9.3" /title="MOD1 Standard Deviation:".
print MOD1min /format "f9.3" /title="MOD1 Lowest Score:".
print MOD1max /format "f9.3" /title="MOD1 Highest Score:".
print mn(1,3) /format "f9.3" /title="MOD2 Mean: ".
print sd(1,3) /format "f9.3" /title="MOD2 Standard Deviation:".
print MOD2min /format "f9.3" /title="MOD2 Lowest Score:".
print MOD2max /format "f9.3" /title="MOD2 Highest Score:".
print chunkIDV /title="Specified # of chunks for the IDV".
print chnkMOD1 /title="Specified # of chunks for the MOD1".
print chnkMOD2 /title="Specified # of chunks for the MOD2".
print lowIDV /format "f9.3" /title="IDV Low  Value for an Optimal Design:".
print highIDV /format "f9.3" /title="IDV High Value for an Optimal Design:".
print lowMOD1 /format "f9.3" /title="MOD1 Low  Value for an Optimal Design:".
print highMOD1 /format "f9.3" /title="MOD1 High Value for an Optimal Design:".
print lowMOD2 /format "f9.3" /title="MOD2 Low  Value for an Optimal Design:".
print highMOD2 /format "f9.3" /title="MOD2 High Value for an Optimal Design:".
print Vxzxz1 /format "f9.3" /title="Vxzxz1 (residual variance of the product term)".
print Vxzxz2 /format "f9.3" /title="Vxzxz2 (residual variance of the product term)".
print maxVxzxz /format "f9.3" /title="maxVxzxz (maximum possible value of Vxzxz)".
print releffic /format "f9.3" /title="Relative Efficiency (Vxzxz / maxVxzxz)".
print PRE /format "f9.3" /title="PRE (Proportional Reduction in Error)".
print PRE2 /format "f9.3" /title="PRE for the optimal design".
print /space = 2 /title='Regression results for the equation that DOES NOT include quadratic terms:'.
print b(1,1) /format "f9.3" /title="Slope coefficient for IDV:".
print b(1,2) /format "f9.3" /title="Slope coefficient for MOD1:".
print b(1,3) /format "f9.3" /title="Slope coefficient for MOD2:".
print b(1,4) /format "f9.3" /title="Slope coefficient for IDV * MOD1:".
print b(1,5) /format "f9.3" /title="Slope coefficient for IDV * MOD2:".
print b(1,6) /format "f9.3" /title="Slope coefficient for MOD1 * MOD2:".
print b(1,7) /format "f9.3" /title="Slope coefficient for IDV * MOD1 * MOD2:".
print a /format "f9.3" /title="Intercept:".
print r2chXn /format "f9.3" /title="R-squared change for the interaction term".
print fsquare /format "f9.3" /title="f-squared effect size for the interaction term".
print F /format "f9.3" /title="F value for the interaction term".
print pF /format "f9.3" /title="Significance level of F (tabled value)".
print permutes /format "f9.3" /title="Specified number of randomized data sets".


* Analyses of the randomized data.
compute counter = 0.
compute counterq = 0.
loop #nperms = 1 to permutes.

*  The raw data permutations are based on column-wise random shufflings
*  of the values in the raw data matrix using Castellan's (1992, 
*  BRMIC, 24, 72-77) algorithm; The distributions of the original 
*  raw variables are exactly preserved in the shuffled versions used
*  in the parallel analyses.

* data matrix to permute.
compute permdata = { idv, mod1, mod2, dv }.
loop #luper = 1 to (bigN -1).
compute k1 = trunc( (bigN - #luper + 1) * uniform(1,1) + 1 )  + #luper - 1.
compute k2 = trunc( (bigN - #luper + 1) * uniform(1,1) + 1 )  + #luper - 1.
compute k3 = trunc( (bigN - #luper + 1) * uniform(1,1) + 1 )  + #luper - 1.
compute k4 = trunc( (bigN - #luper + 1) * uniform(1,1) + 1 )  + #luper - 1.
compute d1 = permdata(#luper,1).
compute d2 = permdata(#luper,2).
compute d3 = permdata(#luper,3).
compute d4 = permdata(#luper,4).
compute permdata(#luper,1) = permdata(k1,1).
compute permdata(#luper,2) = permdata(k2,2).
compute permdata(#luper,3) = permdata(k3,3).
compute permdata(#luper,4) = permdata(k4,4).
compute permdata(k1,1) = d1.
compute permdata(k2,2) = d2.
compute permdata(k3,3) = d3.
compute permdata(k4,4) = d4.
end loop.
compute idv12 = permdata(:,1).
compute mod12 = permdata(:,2).
compute mod22 = permdata(:,3).
compute dv12  = permdata(:,4).

* Moderated Regression.
compute x12 = idv12  &* mod12.
compute x22 = idv12  &* mod22.
compute x32 = mod12  &* mod22.
compute x42 = idv12  &* mod12 &* mod22.
compute datam3 = {idv12, mod12, mod22, x12, x22, x32, x42, dv12}.
compute rawsp3 = t(datam3) * datam3 .
compute rsums3 = t(csum(datam3)).
compute corsp3 = rawsp3 - (1/bigN) * (rsums3) * t(rsums3) .
compute vcv3 = corsp3 * (1/(bigN-1)).
compute sd3 = t(sqrt(diag(vcv3))).
compute d3 = inv(mdiag(sqrt(diag(vcv3)))).
compute cr3 = d3 * vcv3 * d3.
compute beta3 = inv(cr3(1:7,1:7)) * cr3(1:7,8).
compute r2all3  = t(beta3) * cr3(1:7,8).
compute r2main3 =  t(( inv(cr3(1:6,1:6))*cr3(1:6,8)))  *  cr3(1:6,8).
compute F3 = (r2all3-r2main3) / ((1-r2all3)/(bigN-7-1)).
do if (F3 >= F).
compute  counter = counter + 1.
end if.

* Moderated Regression including quadratic terms.
compute idvq2 = idv12  &* idv12.
compute mod1q2 = mod12  &* mod12.
compute mod2q2 = mod22 &* mod22.
compute datamq2 = {idv12, mod12, mod22, idvq2, mod1q2, mod2q2, x12, x22, x32, x42, dv12}.
* mean, sd, & correlation matrix (Bernstein, p. 77-79).
compute rawspq2 = t(datamq2) * datamq2 .
compute rsumsq2 = t(csum(datamq2)).
compute mnq2 = t(rsumsq2) / bigN.
compute corspq2 = rawspq2 - (1/bigN) * (rsumsq2) * t(rsumsq2) .
compute vcvq2 = corspq2 * (1/(bigN-1)).
compute sdq2 = t(sqrt(diag(vcvq2))).
compute dq2 = inv(mdiag(sqrt(diag(vcvq2)))).
compute crq2 = dq2 * vcvq2 * dq2.
compute betaq2 = inv(crq2(1:10,1:10)) * crq2(1:10,11).
compute bq2 = (sdq2(1,11) &/ sdq2(1,1:10))   &*  t(betaq2).
compute aq2 = mnq2(1,11) - rsum( mnq2(1,1:10) &* bq2 ).
compute r2allq2  = t(betaq2) * crq2(1:10,11).
compute r2mainq2 =  t(( inv(crq2(1:9,1:9))*crq2(1:9,11)))  *  crq2(1:9,11).
compute r2chXnq2 = r2allq2 - r2mainq2.
compute Fq2 = (r2allq2-r2mainq2) / ((1-r2allq2)/(bigN-10-1)).
do if (Fq2 >= Fq).
compute  counterq = counterq + 1.
end if.

end loop.

* significance level computation from Noreen (1989, p. 56).
compute siglevel  = (counter  + 1) / (permutes + 1).
compute siglevq = (counterq + 1) / (permutes + 1).

print siglevel /format "f9.3" /title="Significance level of F (randomization test):".
print /space = 2 /title='Regression results for the equation that INCLUDES quadratic terms:'.
print bq(1,1) /format "f9.3" /title="Slope coefficient for IDV:".
print bq(1,2) /format "f9.3" /title="Slope coefficient for MOD1:".
print bq(1,3) /format "f9.3" /title="Slope coefficient for MOD2:".
print bq(1,4) /format "f9.3" /title="Slope coefficient for IDV quadratic term:".
print bq(1,5) /format "f9.3" /title="Slope coefficient for MOD1 quadratic term:".
print bq(1,6) /format "f9.3" /title="Slope coefficient for MOD2 quadratic term:".
print bq(1,7) /format "f9.3" /title="Slope coefficient for IDV * MOD1:".
print bq(1,8) /format "f9.3" /title="Slope coefficient for IDV * MOD2:".
print bq(1,9) /format "f9.3" /title="Slope coefficient for MOD1 * MOD2:".
print bq(1,10) /format "f9.3" /title="Slope coefficient for IDV * MOD1 * MOD2:".
print aq /format "f9.3" /title="Intercept:".
print r2chXnq /format "f9.3" /title="R-squared change for the interaction term".
print fsquareq /format "f9.3" /title="f-squared effect size for the interaction term".
print Fq /format "f9.3" /title="F value for the interaction term".
print pFq /format "f9.3" /title="Significance level of F (tabled value)".
print permutes /format "f9.3" /title="Specified number of randomized data sets".
print siglevq /format "f9.3" /title="Significance level of F (randomization test):".

end matrix.