* SIMPLE-2grps: For interactions between a continuous 
                IDV & a dichotomous Moderator.


****************  START OF TRIAL-RUN DATA  *****************************
   The following commands generate artificial data that can be used for
   a trial-run of the program.  Just run this whole file.  
new file.
input program.
loop #a=1 to 50.
compute group=1.
compute idv = normal (10).
compute dv  = normal (10)  - 10.
end case.
end loop.
loop #a=1 to 50.
compute group=2.
compute idv = normal (10).
compute dv  = normal (10)  + 10.
end case.
end loop.
end file.
end input program.
if group=1   dv=idv *  1.45 + dv * sqrt(1 -  .45**2).
if group=2   dv=idv * -1.45 + dv * sqrt(1 - -.45**2).
descriptives var = all.

* moderated regression on the artificial data.
if group = 1      dum = 0.
if group = 2      dum = 1.
compute x = idv * dum.
regression 
 /var= idv dum x dv
 /statistics=defaults zpp bcov
 /dependent=dv
 /enter idv dum
 /test (x) .

****************  END OF TRIAL-RUN DATA  ************************************.




* This version of the program reads and processes a raw data file,
  containing the variable scores. (In contrast, the previous  
  version of the program read and processed saved matrix data).

* For analyses of your own data, it may be easiest to create the
  following three variables in your data file: idv, group, & dv
  e.g., compute idv = name of the independent variable;
  e.g., compute group = name of the group variable;
  e.g., compute dv  = name of the dependent variable;  
  There must be no missing values.

* The values of group variable must be either "1" or "2";
  No other values are recognized by the program.

* The program automatically computes dummy codes & product terms.

* The syntax lines below must be run as a group, i.e., run all of
   the commands between the MATRIX and END MATRIX
   commands at once, including the MATRIX and END MATRIX commands.

set mxloops=999999  printback = off length=none width=100.
matrix.

* The GET command below reads the data file & creates a data
  matrix ("datamat"). Enter the name of the dataset (and the
  file path) on the GET command after "file=".  Using a "*"
  instead of a file name & path causes the program to use
  the currently active SPSS data set.

* Then enter the names of the variables after "variables =" on
  the GET command.  Only three variables names are permitted,
  and they must appear in the following order: idv, group, dv.

* If the name of the idv in your dataset is something other
  than "idv", then enter the correct name in the place of "idv"
  after "variables =".

* If the name of the group variable in your dataset is
  something other than "group", then enter the correct name in
  the place of "group" after "variables =". The values of the
  group variable must be either "1" or "2"; 
  No other values are recognized by the program.

* If the name of the dv in your dataset is something other than
  "dv", then enter the correct name in the place of "dv" 
  after "variables =".

get datamat / file=*  / variables = idv, group, dv  / missing=omit.

* The program automatically computes the dummy codes & product terms.

* The program generates data for plots of simple regression lines;
  The IDV range was set at 2 SDs below and 2 SDs above the IDV mean;
  Alternative low and high levels may be specified now -- simply change
  the value for multiIDV from "2.0" to whatever multiple of the SD you prefer.

compute multiIDV = 2.0  .

****************** End of User Specifications *********************.

* creating the data matrix with dummy codes & the product term.
compute datam={ datamat(:,1), (datamat(:,2)-1), 
               (datamat(:,1)&*(datamat(:,2)-1)), datamat(:,3) }.

* n, mean, sd, & correlation matrix (Bernstein, p. 77-79).
compute n = nrow(datam).
compute rawsp = t(datam) * datam .
compute rsums = t(csum(datam)).
compute mn = t(rsums) / n.
compute corsp = rawsp - (1/n) * (rsums) * t(rsums) .
compute vcv = corsp * (1/(n-1)).
compute sd = t(sqrt(diag(vcv))).
compute d = inv(mdiag(sqrt(diag(vcv)))).
compute cr = d * vcv * d.

* separating the data for Group 1 and Group 2.
compute datagrp1 = make(1, 2, -9999).
compute datagrp2 = make(1, 2, -9999).
loop #luper = 1 to n  .
do if (datam(#luper,2)=0).
compute datagrp1 = { datagrp1; datam(#luper,1), datam(#luper,4) }.
end if.
do if (datam(#luper,2)=1).
compute datagrp2 = { datagrp2; datam(#luper,1), datam(#luper,4) }.
end if.
end loop.
compute datagrp1 = datagrp1(2:nrow(datagrp1),:).
compute datagrp2 = datagrp2(2:nrow(datagrp2),:).

* n, mean, sscp, & sd for Group 1.
compute n1 = nrow(datagrp1).
compute rawsp1 = t(datagrp1) * datagrp1 .
compute rsums1 = t(csum(datagrp1)).
compute mn1 = t(rsums1) / n1.
compute corsp1 = rawsp1 - (1/n1) * (rsums1) * t(rsums1) .
compute vcv1 = corsp1 * (1/(n1-1)).
compute sd1 = t(sqrt(diag(vcv1))).
compute d1 = inv(mdiag(sqrt(diag(vcv1)))).
compute cr1 = d1 * vcv1 * d1.

* n, mean, sscp, & sd for Group 2.
compute n2 = nrow(datagrp2).
compute rawsp2 = t(datagrp2) * datagrp2 .
compute rsums2 = t(csum(datagrp2)).
compute mn2 = t(rsums2) / n2.
compute corsp2 = rawsp2 - (1/n2) * (rsums2) * t(rsums2) .
compute vcv2 = corsp2 * (1/(n2-1)).
compute sd2 = t(sqrt(diag(vcv2))).
compute d2 = inv(mdiag(sqrt(diag(vcv2)))).
compute cr2 = d2 * vcv2 * d2.

* Overall regression coeffs.
compute beta = inv(cr(1:3,1:3)) * cr(1:3,4) .
compute b = (sd(1,4) &/ sd(1,1:3))   &* t(beta) .
compute a = mn(1,4) - ( rsum ( mn(1,1:3) &* b ) ) .
compute r2  = t(beta) * cr(1:3,4) .
compute r2main = t(inv(cr(1:2,1:2))*cr(1:2,4))*cr(1:2,4).
compute r2chXn = r2 - r2main.
compute fsquared = (r2 - r2main) / (1 - r2) .
compute F = (r2-r2main) / ((1-r2)/(n-3-1)).
compute dferror = n - 3 - 1.
compute pF = 1 - fcdf(F,1,dferror) .

print {r2chXn,F,{1},dferror,fsquared,pF} /format="f12.3" /title="Coefficients for the Interaction"
 /clabels="Rsq. ch." "F" "df num." "df denom." "fsquared" "Sig. F".
print {t(b),beta}  /format="f12.3" /title="Beta weights for the full equation:"
  /rlabels="idv" "dum" "Xn" /clabels="raw b" "std.beta"  .
print a  /format="f12.3"  /title="The intercept is:" .


* simple slope info.
compute dum = { 0 ; 1 }.
compute slopes={ b(1,1)+b(1,3)*dum(1,1) ; b(1,1)+b(1,3)*dum(2,1) }.
compute aslopes={ b(1,2)*dum(1,1)+a ; b(1,2)*dum(2,1)+a }.
compute mse = (n/(n-3))*(sd(1,4)**2)*(1-r2).
compute Sb=mse*inv((mdiag(sd(1,1:3))*cr(1:3,1:3)*mdiag(sd(1,1:3)))*(n-1)).
compute SEslopes={ (sqrt ( {1,0,dum(1,1)} * Sb * t({1,0,dum(1,1)}) )) ;
                   (sqrt ( {1,0,dum(2,1)} * Sb * t({1,0,dum(2,1)}) ))   }.
compute tslopes = slopes &/ SEslopes .
compute df = { (n-3-1) ;  (n-3-1) }.
compute zslopes  = slopes &*  { sd1(1,1)/sd1(1,2) ; sd2(1,1)/sd2(1,2) }.
compute zSE = SEslopes &*  { sd1(1,1)/sd1(1,2) ; sd2(1,1)/sd2(1,2) }.  
compute dfs =  n-3-1 .
compute pslopes = (1 - tcdf(abs(tslopes),dfs)) * 2.

* df & t values -- from Darlington p 516 & Howell 87 p 586 --  p = 05 two-tailed .
compute dft={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22,24,26,28,
 30,32,34,36,38,40,43,46,49,52,56,60,65,70,75,80,85,90,95,100,110,120,130,
 150,175,200,250,300,400,500,600,700,800,900,1000,1000000000;
 12.706,4.303,3.182,2.776,2.571,2.447,2.365,2.306,2.262,2.228,2.201,2.179,
 2.160,2.145,2.131,2.120,2.110,2.101,2.093,2.086,2.074,2.064,2.056,2.048,
 2.042,2.037,2.032,2.028,2.024,2.021,2.017,2.013,2.010,2.007,2.003,2.000,
 1.997,1.994,1.992,1.990,1.988,1.987,1.985,1.984,1.982,1.980,1.978,1.976,
 1.974,1.972,1.969,1.968,1.966,1.965,1.964,1.963,1.963,1.963,1.962,1.962 }.
compute tabledT = 0.
loop #a = 1 to 59  .
do if (dfs ge dft(1,#a) and dfs < dft(1,#a+1)) .
compute tabledT = dft(2,#a) .
end if.
end loop if (tabledT > 0).
compute confidLo = (zslopes - (tabledT &* zSE))   .
compute confidHi = (zslopes + (tabledT &* zSE))  .

print { aslopes , slopes , tslopes , df , pslopes}   /format="f12.3" / space=2
  /title="Simple Slope Coefficients for the DV on the IDV "
+ "for Individual Groups:" 
  /rlabels="Group 1"  "Group 2" /clabels="a" "raw b" "t-test" "df" "Sig. T".
print { zslopes , zSE , confidLO, confidHI }  /format="f12.3" 
  /title="Standardized Simple Slopes & 95% Confidence Intervals: "
  /rlabels="Group 1"  "Group 2" 
  /clabels="std.beta" "SE" "95%  Low" "95%  Hi".
print {(aslopes(1,1)-aslopes(2,1)) / (slopes(2,1) - slopes(1,1))} /format="f12.3" 
  /title="The simple regression lines for Group 1 & Group 2"
+ " intersect at IDV =" / space=2.


* Simultaneous Regions of Significance. 
compute sscp1 = mdiag(sd1(1,1:2)) * cr1 * mdiag(sd1(1,1:2)) * (n1-1).
compute sscp2 = mdiag(sd2(1,1:2)) * cr2 * mdiag(sd2(1,1:2)) * (n2-1).
compute ssreg1 = ((sscp1(1,2)**2)/sscp1(1,1)) .
compute ssreg2 = ((sscp2(1,2)**2)/sscp2(1,1)) .
compute ssresd = (sscp1(2,2)-ssreg1) + (sscp2(2,2)-ssreg2) .
compute N = n1 + n2.
compute aa = aslopes(1,1) - aslopes(2,1).
compute bb =  slopes(1,1) -  slopes(2,1).
compute df2 = N - 4.

* df & F values -- from Bernstein 88 p 420 --  p = 05 two-tailed .
compute dfF={13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,32,34,
 36,38,40,42,44,46,48,50,55,60,65,70,80,100,125,150,200,400,1000,1000000000;
 3.80,3.74,3.68,3.63,3.59,3.55,3.52,3.49,3.47,3.44,3.42,3.40,3.38,3.37,3.35,
 3.34,3.33,3.32,3.30,3.28,3.26,3.25,3.23,3.22,3.21,3.20,3.19,3.18,3.17,3.15,
 3.14,3.13,3.11,3.09,3.07,3.06,3.04,3.02,3.00,2.99 }.
compute tabledF = 0.
loop #a = 1 to 39  .
do if (df2 ge dff(1,#a) and df2 < dff(1,#a+1)).
compute tabledF = dff(2,#a) .
end if.
end loop if (tabledF > 0).

compute A=((2*tabledF*-1)/(N-4)) * ssresd * ((1/ssreg1)+(1/ssreg2)) + (bb**2).
compute B=(2*tabledF/(N-4))*ssresd*((mn1(1,1)/ssreg1)+(mn2(1,1)/ssreg2))+(aa*bb).
compute C=((2*tabledF*-1)/(N-4)) * ssresd * ( (N/(n1*n2))+
          ((mn1(1,1)**2)/ssreg1)+((mn2(1,1)**2)/ssreg2) ) + (aa**2).
compute Xlo = -9999.
compute Xhi = -9999.
do if ( (B**2 - A*C) gt 0).
compute hi = (-B + (sqrt(B**2 - A*C)) ) / A.
compute lo = (-B - (sqrt(B**2 - A*C)) ) / A.
do if (hi > lo).
compute Xhi = hi.
end if.
do if (lo < hi).
compute Xlo = lo.
end if.
end if.

print /title=
 "Simultaneous Regions of Significance -- Johnson-Neyman Technique:" / space=2.
print /title=
 "The regression lines for group comparisons are significantly different".
print {xlo, xhi}  /format="f12.3" 
  /title="at IDV scores < Lo Value & > Hi Value:"
  /clabels="Lo Value" "Hi Value".
print /title="-9999 indicates that meaningful values could not be computed.".


* data for plot.
compute idvlo1 = mn1(1,1) - (sd1(1,1) * multiIDV).
compute idvhi1 = mn1(1,1) + (sd1(1,1) * multiIDV).
compute idvlo2 = mn2(1,1) - (sd2(1,1) * multiIDV).
compute idvhi2 = mn2(1,1) + (sd2(1,1) * multiIDV).
compute idv    = { idvlo1; idvhi1; idvlo2; idvhi2 } .
compute dv =({slopes(1,1); slopes(1,1); slopes(2,1); slopes(2,1)} &* idv)
           +{aslopes(1,1);aslopes(1,1);aslopes(2,1);aslopes(2,1)}.
compute group    = { 1 ; 1 ; 2 ; 2 }.
compute data = { group , idv , dv }. 
print data  /format="f12.3" / space=2
 /title="Data for simple slope plots:" /clabels="Group" "IDV" "DV" .
save data /outfile=* / var=group idv dv .

end matrix.

* The following PLOT command can be used instead of the GRAPH 
   command in SPSS version 11 and earlier.
* plot vsize=15/hsize=50/ format=contour(2) / plot=dv with idv by group.

* The SPSS GRAPH command has few options for controlling the graphs;
  Lines for groups sometimes have gaps, in which case you should assume
  that separated lines of the same color should be joined together;
  Enter the "Data for simple slope plots:" into a more flexible graphing
  program, such as DeltaGraph, for better displays.
  
graph   / line = mean(dv) by idv by group .

graph   / scatter = idv with dv by group.