options nocenter nodate nonumber linesize=100 pagesize=500; title; /* Moderated Regression For A 2-way Interaction Between Continuous Variables */ /**************** START OF TRIAL-RUN DATA ************************************ The following commands generate artificial data that can be used for a trial-run of the program. Just run this whole file. */ data trialrun; do cases = 1 to 200; idv = normal(1); moder = normal(1); e = ranuni(1); x = idv * moder; dv = 8.7*idv + -1.49*moder + 1.23*(idv * moder) + e*30; output;end; proc means data = trialrun; run; /**************** END OF TRIAL-RUN DATA ************************************/ proc iml; reset noname; /* Enter the name of the dataset on the following USE statement. i.e., Replace "trialrun" with the name of your dataset. */ use trialrun; /* Enter the names of the variables to be analyzed from your data set inside the curly brackets on the following READ statement. The order of the variable names must be : IDV, Moderator, DV */ read all var { idv moder dv } into data; /* Specify the number of chunks for the IDV */ chunkIDV = 7 ; /* Specify the number of chunks for the MOD */ chunkMOD = 7 ; /* Specify the method of determining highest & lowest values for the IDV & MOD /* for an optimal design /* Enter 1 to let the program do it automatically /* Enter 2 to use your own preferred values */ optdes = 1; /* If you entered 2 on the above "optdes =" statement, then enter your preferred /* values on the next four statements: */ /* Enter the lowest value for the IDV in an optimal design */ lowIDV = 1; /* Enter the highest value for the IDV in an optimal design */ highIDV = 5; /* Enter the lowest value for the MOD in an optimal design */ lowMOD = 1; /* Enter the highest value for the MOD in an optimal design */ highMOD = 5; /* Specify the # of randomized data sets for the randomization /* test of statistical significance. Recommendations: /* use 100 data sets for a trial run, but use 1000 or more for final results */ permutes = 100; /* End of required user specifications */ idv = data[,1]; moder = data[,2]; dv = data[,3]; show datasets; show contents; print,,,, 'Moderated Regression For A 2-way Interaction Between Continuous Variables:'; /* row sums module */ start rsum(matname); rsums =j(nrow(matname),1); do rows = 1 to nrow(matname); dumr = matname[rows,]; rsums[rows,1]=sum(dumr); end; return(rsums); finish; /* collumn sums module */ start csum(matname); csums =j(1,ncol(matname)); do cols = 1 to ncol(matname); dumc = matname[,cols]; csums[1,cols]=sum(dumc); end; return(csums); finish; /* column means module */ start cmean(matname); moyennes = matname[+,] / nrow(matname); return(moyennes); finish; /* column standard deviations module -- IML manual p 112 */ start cstd(matname); N = nrow(matname); matname = matname - repeat(cmean(matname),N,1); ss = matname[##,]; csd = sqrt(ss/(N-1)); return(csd); finish; /* correlation matrix module -- IML manual p 112 */ start corr(matname); n = nrow(matname); sum = matname[+,]; xpx = t(matname) * matname - t(sum) * sum / n; s = diag(1 / sqrt(vecdiag(xpx))); corr = s * xpx * s; return(corr); finish; /* sample size */ bigN = nrow(idv); /* centering the IDV & MOD */ idv = idv - cmean(idv); moder = moder - cmean(moder); /* 2-D frequency distribution. */ IDVmax = max(idv) + .00000001; IDVmin = min(idv) - .00000001; MODmax = max(moder) + .00000001; MODmin = min(moder) - .00000001; increm1 = (IDVmax - IDVmin) / chunkIDV; increm2 = (MODmax - MODmin) / chunkMOD; freqs = j(chunkIDV, chunkMOD, 0); do luper = 1 to bigN; IDVchunk = ceil( ( idv[luper,1] - IDVmin ) / increm1 ); MODchunk = ceil( ( moder[luper,1] - MODmin ) / increm2 ); freqs[ IDVchunk, MODchunk ] = freqs[ IDVchunk, MODchunk ] + 1; end; /* Moderated Regression */ x = idv # moder; cr = corr( idv || moder || x || dv ); beta = inv(cr[1:3,1:3]) * cr[1:3,4]; mn = cmean( idv || moder || x || dv ); sd = cstd ( idv || moder || x || dv ); b = (sd[1,4] / sd[1,1:3]) # t(beta); a = mn[1,4] - ( sum( mn[1,1:3] # b ) ); r2all = t(beta) * cr[1:3,4]; r2main = t( inv(cr[1:2,1:2])*cr[1:2,4] ) * cr[1:2,4]; r2chXn = r2all - r2main; fsquare = (r2all - r2main) / (1 - r2all); r2chXn = r2all - r2main; F = (r2all-r2main) / ((1-r2all)/(bigN-3-1)); dferror = bigN - 3 - 1; pF = 1 - probf(F,1,dferror); /* Moderated Regression including quadratic terms */ idvq = idv # idv; moderq = moder # moder; crq = corr( idv || moder || idvq || moderq || x || dv ); betaq = inv(crq[1:5,1:5]) * crq[1:5,6]; mnq = cmean( idv || moder || idvq || moderq || x || dv ); sdq = cstd( idv || moder || idvq || moderq || x || dv ); bq = (sdq[1,6] / sdq[1,1:5]) # t(betaq); aq = mnq[1,6] - ( sum( mnq[1,1:5] # bq ) ); r2allq = t(betaq) * crq[1:5,6]; r2mainq = t( inv(crq[1:4,1:4])*crq[1:4,6] ) * crq[1:4,6]; r2chXnq = r2allq - r2mainq; fsquareq = (r2allq - r2mainq) / (1 - r2allq); r2chXnq = r2allq - r2mainq; Fq = (r2allq-r2mainq) / ((1-r2allq)/(bigN-5-1)); dferrorq = bigN - 5 - 1; pFq = 1 - probf(Fq,1,dferrorq); /* f-squared = the proportion of systematic variance accounted for by the effect relative to unexplained variance in the criterion (A & W, 1991, p. 157) */ fsquare = (r2all - r2main) / (1 - r2all); fsquareq = (r2allq - r2mainq) / (1 - r2allq); /* mse Darlington p 121 */ xx = ( j(bigN,1,1) || idv || moder || x ); sse = t(dv) * dv - t( a // t(b)) * t(xx) * dv; mse = sse / ( bigN - 3 - 1); /* Vxzxz = the residual variance of the product XZ after controlling for X & Z (p. 378); /* 2 ways of computing Vxzxz /* method 1 = the MSE that results from regressing XZ on X & Z (p. 381) /* method 2 = using the full mod reg equation, = /* MSE / (n * stand. error of the estimate for the product##2) (p. 381) */ /* method 2: the full mod reg equation approach (yields slightly diff results in unusual sits) /* Cohen & Cohen, 2003, p. 632 */ Rijm1 = inv(cr[1:3,1:3]); SExn = ( sd[1,4] / sd[1,3] ) * sqrt( (1 - r2all) / ( bigN - 3 - 1) ) * sqrt( Rijm1[3,3] ); Vxzxz2 = mse / ( ( bigN - 3 - 1) * SExn##2 ); /* method 1: using the MSE that results from regressing XZ on X & Z approach */ cr2 = corr( idv || moder || x ); beta2 = inv(cr2[1:2,1:2]) * cr2[1:2,3]; sd2 = cstd( idv || moder || x ); mn2 = cmean( idv || moder || x ); b2 = (sd2[1,3] / sd2[1,1:2]) # t(beta2); a2 = mn2[1,3] - ( sum( mn2[1,1:2] # b2 ) ); xx2 = ( j(bigN,1,1) || idv || moder ); sse2 = t(x) * x - t( a2 // t(b2)) * t(xx2) * x; /* mse Darlington p 121 */ mse2 = sse2 / ( bigN - 2 -1); Vxzxz1 = mse2; /* PRE, & Relative efficiency / optimal design values */ if optdes = 1 then do; /* Using the mean scores in the top and bottoms chunks, based on the specified chunk sizes */ idvtop = -9999; idvbot = -9999; modtop = -9999; modbot = -9999; do luper = 1 to bigN; if idv[luper,1] > (IDVmax - increm1) then idvtop = ( idvtop // idv[luper,1] ); if idv[luper,1] < (IDVmin + increm1) then idvbot = ( idvbot // idv[luper,1] ); if moder[luper,1] > (MODmax - increm2) then modtop = ( modtop // moder[luper,1] ); if moder[luper,1] < (MODmin + increm2) then modbot = ( modbot // moder[luper,1] ); end; highIDV = cmean(idvtop[2:nrow(idvtop),1]); lowIDV = cmean(idvbot[2:nrow(idvbot),1]); highMOD = cmean(modtop[2:nrow(modtop),1]); lowMOD = cmean(modbot[2:nrow(modbot),1]); end; /* maximim value of Vxzxz (p. 381, 383) */ maxVxzxz = (( (highIDV - lowIDV) / 2 ) ##2 ) * (( (highMOD - lowMOD) / 2 ) ##2 ); /* Relative Efficiency */ releffic = Vxzxz1 / maxVxzxz; /* PRE squared partial correlation = proportional reduction in error (p 377, 384) */ /* = the model improvement due to adding the product term */ PRE = 1 / ( 1+ ( mse / (b[1,3]##2 * Vxzxz1) ) ); /* PRE for an optimal design p 384 */ PRE2 = 1 / ( 1+ ( mse / (b[1,3]##2 * maxVxzxz) ) ); print "Sample Size: " bigN; print "IDV Mean: " (mn[1,1]) [format=9.3]; print "IDV Standard Deviation: " (sd[1,1]) [format=9.3]; print "IDV Lowest Score: " IDVmin [format=9.3]; print "IDV Highest Score: " IDVmax [format=9.3]; print "MOD Mean: " (mn[1,2]) [format=9.3]; print "MOD Standard Deviation: " (sd[1,2]) [format=9.3]; print "MOD Lowest Score: " MODmin [format=9.3]; print "MOD Highest Score: " MODmax [format=9.3]; print "Specified # of chunks for the IDV: " chunkIDV; print "Specified # of chunks for the MOD: " chunkMOD; print "IDV Low Value for an Optimal Design: " lowIDV [format=9.3]; print "IDV High Value for an Optimal Design: " highIDV [format=9.3]; print "MOD Low Value for an Optimal Design: " lowMOD [format=9.3]; print "MOD High Value for an Optimal Design: " highMOD [format=9.3]; print "Joint Frequencies of the IDV & MOD:",, freqs; print "Vxzxz1 (residual variance of the product term): " Vxzxz1 [format=9.3]; print "Vxzxz2 (residual variance of the product term): " Vxzxz2 [format=9.3]; print "maxVxzxz (maximum possible value of Vxzxz): " maxVxzxz [format=9.3]; print "Relative Efficiency (Vxzxz / maxVxzxz): " releffic [format=9.3]; print "PRE (Proportional Reduction in Error): " PRE [format=9.3]; print "PRE for the optimal design: " PRE2 [format=9.3]; print,,,, "Regression results for the equation that DOES NOT include quadratic terms:"; print "Slope coefficient for the IDV: " (b[1,1]) [format=9.3]; print "Slope coefficient for the MOD: " (b[1,2]) [format=9.3]; print "Slope coefficient for the product term: " (b[1,3]) [format=9.3]; print "Intercept: " a [format=9.3]; print "R-squared change for the interaction term: " r2chXn [format=9.3]; print "f-squared effect size for the interaction term: " fsquare [format=9.3]; print "F value for the interaction term: " F [format=9.3]; print "Significance level of F (tabled value): " pF [format=9.6]; print "Specified number of randomized data sets: " permutes; /* Analyses of the randomized data */ counter = 0; counterq = 0; do nperms = 1 to permutes; /* The raw data permutations are based on column-wise random shufflings /* of the values in the raw data matrix using Castellan's (1992, /* BRMIC, 24, 72-77) algorithm; The distributions of the original /* raw variables are exactly preserved in the shuffled versions used /* in the parallel analyses. */ /* data matrix to permute */ permdata = ( idv || moder || dv ); do luper = 1 to (bigN -1); k1 = int( (bigN - luper + 1) * ranuni(1) + 1 ) + luper - 1; k2 = int( (bigN - luper + 1) * ranuni(1) + 1 ) + luper - 1; k3 = int( (bigN - luper + 1) * ranuni(1) + 1 ) + luper - 1; d1 = permdata[luper,1]; d2 = permdata[luper,2]; d3 = permdata[luper,3]; permdata[luper,1] = permdata[k1,1]; permdata[luper,2] = permdata[k2,2]; permdata[luper,3] = permdata[k3,3]; permdata[k1,1] = d1; permdata[k2,2] = d2; permdata[k3,3] = d3; end; idv2 = permdata[,1]; mod2 = permdata[,2]; dv2 = permdata[,3]; /* Moderated Regression */ x2 = idv2 # mod2; cr2 = corr( idv2 || mod2 || x2 || dv2 ); beta2 = inv(cr2[1:3,1:3]) * cr2[1:3,4]; r2all2 = t(beta2) * cr2[1:3,4]; r2main2 = t( inv(cr2[1:2,1:2])*cr2[1:2,4]) * cr2[1:2,4]; F2 = (r2all2-r2main2) / ((1-r2all2)/(bigN-3-1)); if F2 >= F then counter = counter + 1; /* Moderated Regression including quadratic terms */ idvq2 = idv2 # idv2; modq2 = mod2 # mod2; crq2 = corr( idv2 || mod2 || idvq2 || modq2 || x2 || dv2 ); betaq2 = inv(crq2[1:5,1:5]) * crq2[1:5,6]; r2allq2 = t(betaq2) * crq2[1:5,6]; r2mainq2 = t( inv(crq2[1:4,1:4])*crq2[1:4,6] ) * crq2[1:4,6]; Fq2 = (r2allq2-r2mainq2) / ((1-r2allq2)/(bigN-5-1)); if Fq2 >= Fq then counterq = counterq + 1; end; /* significance level computation from Noreen (1989, p. 56) */ siglevel = (counter + 1) / (permutes + 1); siglevq = (counterq + 1) / (permutes + 1); print "Significance level of F (randomization test): " siglevel [format=9.6]; print,,,, "Regression results for the equation that INCLUDES quadratic terms:"; print "Slope coefficient for the IDV: " (bq[1,1]) [format=9.3]; print "Slope coefficient for the MOD: " (bq[1,2]) [format=9.3]; print "Slope coefficient for the IDV quadratic term: " (bq[1,3]) [format=9.3]; print "Slope coefficient for the MOD quadratic term: " (bq[1,4]) [format=9.3]; print "Slope coefficient for the product term: " (bq[1,5]) [format=9.3]; print "Intercept: " aq [format=9.3]; print "R-squared change for the interaction term: " r2chXnq [format=9.3]; print "f-squared effect size for the interaction term: " fsquareq [format=9.3]; print "F value for the interaction term: " Fq [format=9.3]; print "Significance level of F (tabled value): " pFq [format=9.6]; print "Specified number of randomized data sets: " permutes; print "Significance level of F (randomization test): " siglevq [format=9.6]; /* saving data for plots */ freqsout = (-9999 || -9999 || -9999); do luper = 1 to chunkIDV; do lupec = 1 to chunkMOD; freqsout = ( freqsout // (luper || lupec || freqs[luper,lupec]) ); end;end; freqsout = freqsout[2:nrow(freqsout),]; create plotdata from freqsout[colname={"IDV" "MOD" "Freqs"}]; append from freqsout; quit; /* The following commands generate graphs of the bivariate frequency distribution. They require SAS/GRAPH. */ goptions reset=global gunit=pct border cback=white colors=(blue green red) ctext=black ftitle=swissb ftext=swiss htitle=4 htext=3 hpos=120 vpos=52 ; proc GCHART data = plotdata; block IDV / sumvar=Freqs group = MOD discrete; run; proc g3d data=plotdata; scatter idv*mod=Freqs / caxis=black;run; proc g3d data=plotdata; plot idv*mod=Freqs / caxis=black ;run;