options nocenter nodate nonumber linesize=100 pagesize=500; title; /* Moderated Regression For A 3-way Interaction Between Continuous Variables */ /**************** START OF TRIAL-RUN DATA ************************************ The following commands generate artificial data that can be used for a trial-run of the program. Just run this whole file. */ data trialrun; do cases = 1 to 200; idv = normal (1); mod1 = normal (1); mod2 = normal (1); e = ranuni (1); dv = 8.7*idv + -1.49*mod1 + 2.2*mod2 + 1.23*idv*mod1 + 3.1*idv*mod2 + -1.3*idv*mod1*mod2 + e*30; output;end; proc means data = trialrun;run; /**************** END OF TRIAL-RUN DATA ************************************/ proc iml; reset noname; /* Enter the name of the dataset on the following USE statement. i.e., Replace "trialrun" with the name of your dataset. */ use trialrun; /* Enter the names of the variables to be analyzed from your data set inside the curly brackets on the following READ statement. The order of the variable names must be : IDV, Mod1, Mod2, DV */ read all var { idv mod1 mod2 dv } into data; /* Specify the number of chunks for the IDV */ chunkIDV = 5 ; /* Specify the number of chunks for MOD1 */ chunkMOD1 = 5 ; /* Specify the number of chunks for MOD1 */ chunkMOD2 = 5 ; /* Specify the method of determining highest & lowest values for the IDV & MOD /* for an optimal design /* Enter 1 to let the program do it automatically /* Enter 2 to use your own preferred values */ optdes = 1; /* If you entered 2 on the above "optdes =" statement, then enter your preferred /* values on the next four statements: */ /* Enter the lowest value for the IDV in an optimal design */ lowIDV = 1; /* Enter the highest value for the IDV in an optimal design */ highIDV = 5; /* Enter the lowest value for MOD1 in an optimal design */ lowMOD1 = 1; /* Enter the highest value for MOD1 in an optimal design */ highMOD1 = 5; /* Enter the lowest value for MOD1 in an optimal design */ lowMOD2 = 1; /* Enter the highest value for MOD1 in an optimal design */ highMOD2 = 5; /* Specify the # of randomized data sets for the randomization /* test of statistical significance. Recommendations: /* use 100 data sets for a trial run, but use 1000 or more for final results */ permutes = 10; /* End of required user specifications */ idv = data[,1]; mod1 = data[,2]; mod2 = data[,3]; dv = data[,4]; show datasets; show contents; print, 'Moderated Regression For A 3-way Interaction Between Continuous Variables:'; /* row sums module */ start rsum(matname); rsums =j(nrow(matname),1); do rows = 1 to nrow(matname); dumr = matname[rows,]; rsums[rows,1]=sum(dumr); end; return(rsums); finish; /* collumn sums module */ start csum(matname); csums =j(1,ncol(matname)); do cols = 1 to ncol(matname); dumc = matname[,cols]; csums[1,cols]=sum(dumc); end; return(csums); finish; /* column means module */ start cmean(matname); moyennes = matname[+,] / nrow(matname); return(moyennes); finish; /* column standard deviations module -- IML manual p 112 */ start cstd(matname); N = nrow(matname); matname = matname - repeat(cmean(matname),N,1); ss = matname[##,]; csd = sqrt(ss/(N-1)); return(csd); finish; /* correlation matrix module -- IML manual p 112 */ start corr(matname); n = nrow(matname); sum = matname[+,]; xpx = t(matname) * matname - t(sum) * sum / n; s = diag(1 / sqrt(vecdiag(xpx))); corr = s * xpx * s; return(corr); finish; /* sample size */ bigN = nrow(idv); /* centering the IDV & MOD1 & MOD2 */ idv = idv - cmean(idv); mod1 = mod1 - cmean(mod1); mod2 = mod2 - cmean(mod2); /* 3-D min, max, increm */ IDVmax = max(idv) + .00000001; IDVmin = min(idv) - .00000001; MOD1max = max(mod1) + .00000001; MOD1min = min(mod1) - .00000001; MOD2max = max(mod2) + .00000001; MOD2min = min(mod2) - .00000001; increm1 = (IDVmax - IDVmin) / chunkIDV; increm2 = (MOD1max - MOD1min) / chunkMOD1; increm3 = (MOD2max - MOD2min) / chunkMOD2; /* Moderated Regression */ x1 = idv # mod1; x2 = idv # mod2; x3 = mod1 # mod2; x4 = idv # mod1 # mod2; cr = corr( idv || mod1 || mod2 || x1 || x2 || x3 || x4 || dv ); beta = inv(cr[1:7,1:7]) * cr[1:7,8]; sd = cstd( idv || mod1 || mod2 || x1 || x2 || x3 || x4 || dv ); mn = cmean( idv || mod1 || mod2 || x1 || x2 || x3 || x4 || dv ); b = (sd[1,8] / sd[1,1:7]) # t(beta); a = mn[1,8] - ( sum( mn[1,1:7] # b ) ); r2all = t(beta) * cr[1:7,8]; r2main = t( inv(cr[1:6,1:6])*cr[1:6,8]) * cr[1:6,8]; r2chXn = r2all - r2main; F = (r2all-r2main) / ((1-r2all)/(bigN-7-1)); dferror = bigN - 7 - 1; pF = 1 - probf(F,1,dferror); /* Moderated Regression including quadratic terms */ idvq = idv # idv; mod1q = mod1 # mod1; mod2q = mod2 # mod2; crq = corr( idv || mod1 || mod2 || idvq || mod1q|| mod2q || x1 || x2 || x3 || x4 || dv ); betaq = inv(crq[1:10,1:10]) * crq[1:10,11]; mnq = cmean( idv || mod1 || mod2 || idvq || mod1q|| mod2q || x1 || x2 || x3 || x4 || dv ); sdq = cstd( idv || mod1 || mod2 || idvq || mod1q|| mod2q || x1 || x2 || x3 || x4 || dv ); bq = (sdq[1,11] / sdq[1,1:10]) # t(betaq); aq = mnq[1,11] - ( sum( mnq[1,1:10] # bq ) ); r2allq = t(betaq) * crq[1:10,11]; r2mainq = t( inv(crq[1:9,1:9])*crq[1:9,11] ) * crq[1:9,11]; fsquareq = (r2allq - r2mainq) / (1 - r2allq); r2chXnq = r2allq - r2mainq; Fq = (r2allq-r2mainq) / ((1-r2allq)/(bigN-10-1)); dferrorq = bigN - 10 - 1; pFq = 1 - probf(Fq,1,dferrorq); /* f-squared = the proportion of systematic variance accounted for by the effect /* relative to unexplained variance in the criterion (A & W, 1991, p. 157) */ fsquare = (r2all - r2main) / (1 - r2all); fsquareq = (r2allq - r2mainq) / (1 - r2allq); /* mse Darlington p 121 */ xx = ( j(bigN,1,1) || idv || mod1 || mod2 || x1 || x2 || x3 || x4 ); sse = t(dv) * dv - t( a // t(b)) * t(xx) * dv; mse = sse / ( bigN - 7 - 1); /* Vxzxz = the residual variance of the product XZ after controlling for X & Z (p. 378); /* 2 ways of computing Vxzxz /* method 1 = the MSE that results from regressing XZ on X & Z (p. 381) /* method 2 = using the full mod reg equation, = /* MSE / (n * stand. error of the estimate for the product##2) (p. 381) */ /* method 2: the full mod reg equation approach (yields slightly diff results in unusual sits) /* Cohen & Cohen, 2003, p. 632 */ Rijm1 = inv(cr[1:7,1:7]); SExn = ( sd[1,8] / sd[1,7] ) * sqrt( (1 - r2all) / ( bigN - 7 - 1) ) * sqrt( Rijm1[7,7] ); Vxzxz2 = mse / ( ( bigN - 7 - 1) * SExn##2 ); /* method 1: using the MSE that results from regressing XZ on X & Z approach */ cr2 = corr( idv || mod1 || mod2 || x1 || x2 || x3 || x4 ); beta2 = inv(cr2[1:6,1:6]) * cr2[1:6,7]; sd2 = cstd( idv || mod1 || mod2 || x1 || x2 || x3 || x4 ); mn2 = cmean( idv || mod1 || mod2 || x1 || x2 || x3 || x4 ); b2 = (sd2[1,7] / sd2[1,1:6]) # t(beta2); a2 = mn2[1,7] - ( sum( mn2[1,1:6] # b2 ) ); xx2 = ( j(bigN,1,1) || idv || mod1 || mod2 || x1 || x2 || x3 ); sse2 = t(x4) * x4 - t( a2 // t(b2)) * t(xx2) * x4; /* mse Darlington p 121 */ mse2 = sse2 / ( bigN - 6 -1); Vxzxz1 = mse2; /* PRE, & Relative efficiency / optimal design values */ if optdes = 1 then do; /* Using the mean scores in the top and bottoms chunks, based on the specified chunk sizes */ idvtop = -9999; idvbot = -9999; mod1top = -9999; mod1bot = -9999; mod2top = -9999; mod2bot = -9999; do luper = 1 to nrow(idv); if idv[luper,1] > (IDVmax - increm1) then idvtop = ( idvtop // idv[luper,1] ); if idv[luper,1] < (IDVmin + increm1) then idvbot = ( idvbot // idv[luper,1] ); if mod1[luper,1] > (MOD1max - increm2) then mod1top = ( mod1top // mod1[luper,1] ); if mod1[luper,1] < (MOD1min + increm2) then mod1bot = ( mod1bot // mod1[luper,1] ); if mod2[luper,1] > (MOD2max - increm3) then mod2top = ( mod2top // mod2[luper,1] ); if mod2[luper,1] < (MOD2min + increm3) then mod2bot = ( mod2bot // mod2[luper,1] ); end; highIDV = cmean(idvtop[2:nrow(idvtop),1]); lowIDV = cmean(idvbot[2:nrow(idvbot),1]); highMOD1 = cmean(mod1top[2:nrow(mod1top),1]); lowMOD1 = cmean(mod1bot[2:nrow(mod1bot),1]); highMOD2 = cmean(mod2top[2:nrow(mod2top),1]); lowMOD2 = cmean(mod2bot[2:nrow(mod2bot),1]); end; /* maximim value of Vxzxz (p. 381, 383) */ maxVxzxz = (( (highIDV - lowIDV) / 2 ) ##2 ) * (( (highMOD1 - lowMOD1) / 2 ) ##2 ) * (( (highMOD2 - lowMOD2) / 2 ) ##2 ); /* Relative Efficiency */ releffic = Vxzxz1 / maxVxzxz; /* PRE squared partial correlation = proportional reduction in error (p 377, 384) */ /* = the model improvement due to adding the product term */ PRE = 1 / ( 1+ ( mse / (b[1,7]##2 * Vxzxz1) ) ); /* PRE for an optimal design p 384 */ PRE2 = 1 / ( 1+ ( mse / (b[1,7]##2 * maxVxzxz) ) ); print "Sample Size " bigN; print "IDV Mean: " (mn[1,1]) [format=9.3]; print "IDV Standard Deviation: " (sd[1,1]) [format=9.3]; print "IDV Lowest Score: " IDVmin [format=9.3]; print "IDV Highest Score: " IDVmax [format=9.3]; print "MOD1 Mean: " (mn[1,2]) [format=9.3]; print "MOD1 Standard Deviation: " (sd[1,2]) [format=9.3]; print "MOD1 Lowest Score: " MOD1min [format=9.3]; print "MOD1 Highest Score: " MOD1max [format=9.3]; print "MOD2 Mean: " (mn[1,3]) [format=9.3]; print "MOD2 Standard Deviation: " (sd[1,3]) [format=9.3]; print "MOD2 Lowest Score: " MOD2min [format=9.3]; print "MOD2 Highest Score: " MOD2max [format=9.3]; print "Specified # of chunks for IDV: " chunkIDV; print "Specified # of chunks for MOD1: " chunkMOD1; print "Specified # of chunks for MOD2: " chunkMOD2; print "IDV Low Value for an Optimal Design: " lowIDV [format=9.3]; print "IDV High Value for an Optimal Design: " highIDV [format=9.3]; print "MOD1 Low Value for an Optimal Design: " lowMOD1 [format=9.3]; print "MOD1 High Value for an Optimal Design: " highMOD1 [format=9.3]; print "MOD2 Low Value for an Optimal Design: " lowMOD2 [format=9.3]; print "MOD2 High Value for an Optimal Design: " highMOD2 [format=9.3]; print "Vxzxz1 (residual variance of the product): " Vxzxz1 [format=9.3]; print "Vxzxz2 (residual variance of the product): " Vxzxz2 [format=9.3]; print "maxVxzxz (maximum possible value of Vxzxz): " maxVxzxz [format=9.3]; print "Relative Efficiency (Vxzxz / maxVxzxz): " releffic [format=9.3]; print "PRE (Proportional Reduction in Error): " PRE [format=9.3]; print "PRE for the optimal design: " PRE2 [format=9.3]; print,,,, "Regression results for the equation that DOES NOT include quadratic terms:"; print "Slope coefficient for IDV: " (b[1,1]) [format=9.3]; print "Slope coefficient for MOD1: " (b[1,2]) [format=9.3]; print "Slope coefficient for MOD2: " (b[1,3]) [format=9.3]; print "Slope coefficient for IDV * MOD1: " (b[1,4]) [format=9.3]; print "Slope coefficient for IDV * MOD2: " (b[1,5]) [format=9.3]; print "Slope coefficient for MOD1 * MOD2: " (b[1,6]) [format=9.3]; print "Slope coefficient for IDV * MOD1 * MOD2: " (b[1,7]) [format=9.3]; print "Intercept: " a [format=9.3]; print "R-squared change for the interaction term; " r2chXn [format=9.3]; print "f-squared effect size for the interaction term: " fsquare [format=9.3]; print "F value for the interaction term: " F [format=9.3]; print "Significance level of F (tabled value): " pF [format=9.6]; print "Specified number of randomized data sets: " permutes; /* Analyses of the randomized data */ counter = 0; counterq = 0; do nperms = 1 to permutes; /* The raw data permutations are based on column-wise random shufflings /* of the values in the raw data matrix using Castellan's (1992, /* BRMIC, 24, 72-77) algorithm; The distributions of the original /* raw variables are exactly preserved in the shuffled versions used /* in the parallel analyses. */ /* data matrix to permute */ permdata = ( idv || mod1 || mod2 || dv ); do luper = 1 to (bigN -1); k1 = int( (bigN - luper + 1) * ranuni(1) + 1 ) + luper - 1; k2 = int( (bigN - luper + 1) * ranuni(1) + 1 ) + luper - 1; k3 = int( (bigN - luper + 1) * ranuni(1) + 1 ) + luper - 1; k4 = int( (bigN - luper + 1) * ranuni(1) + 1 ) + luper - 1; d1 = permdata[luper,1]; d2 = permdata[luper,2]; d3 = permdata[luper,3]; d4 = permdata[luper,4]; permdata[luper,1] = permdata[k1,1]; permdata[luper,2] = permdata[k2,2]; permdata[luper,3] = permdata[k3,3]; permdata[luper,4] = permdata[k4,4]; permdata[k1,1] = d1; permdata[k2,2] = d2; permdata[k3,3] = d3; permdata[k3,4] = d4; end; idv12 = permdata[,1]; mod12 = permdata[,2]; mod22 = permdata[,3]; dv12 = permdata[,4]; /* Moderated Regression */ x12 = idv12 # mod12; x22 = idv12 # mod22; x32 = mod12 # mod22; x42 = idv12 # mod12 # mod22; cr2 = corr( idv12 || mod12 || mod22 || x12 || x22 || x32 || x42 || dv12 ); beta2 = inv(cr2[1:7,1:7]) * cr2[1:7,8]; r2all2 = t(beta2) * cr2[1:7,8]; r2main2 = t( inv(cr2[1:6,1:6])*cr2[1:6,8]) * cr2[1:6,8]; r2chXn2 = r2all2 - r2main2; fsquare2 = (r2all2 - r2main2) / (1 - r2all2); F2 = (r2all2-r2main2) / ((1-r2all2)/(bigN-7-1)); if F2 >= F then counter = counter + 1; /* Moderated Regression including quadratic terms */ idvq2 = idv12 # idv12; mod1q2 = mod12 # mod12; mod2q2 = mod22 # mod22; crq2 = corr( idv12 || mod12 || mod22 || idvq2 || mod1q2 || mod2q2 || x12 || x22 || x32 || x42 || dv12 ); betaq2 = inv(crq2[1:10,1:10]) * crq2[1:10,11]; r2allq2 = t(betaq2) * crq2[1:10,11]; r2mainq2 = t( inv(crq2[1:9,1:9])*crq2[1:9,11] ) * crq2[1:9,11]; Fq2 = (r2allq2-r2mainq2) / ((1-r2allq2)/(bigN-10-1)); if Fq2 >= Fq then counterq = counterq + 1; end; /* significance level computation from Noreen (1989, p. 56) */ siglevel = (counter + 1) / (permutes + 1); siglevq = (counterq + 1) / (permutes + 1); print "Significance level of F (randomization test): " siglevel [format=9.6]; print,,,, "Regression results for the equation that INCLUDES quadratic terms:"; print "Slope coefficient for IDV: " (bq[1,1]) [format=9.3]; print "Slope coefficient for MOD1: " (bq[1,2]) [format=9.3]; print "Slope coefficient for MOD2: " (bq[1,3]) [format=9.3]; print "Slope coefficient for the IDV quadratic term: " (bq[1,4]) [format=9.3]; print "Slope coefficient for the MOD1 quadratic term: " (bq[1,5]) [format=9.3]; print "Slope coefficient for the MOD2 quadratic term: " (bq[1,6]) [format=9.3]; print "Slope coefficient for IDV * MOD1: " (bq[1,7]) [format=9.3]; print "Slope coefficient for IDV * MOD2: " (bq[1,8]) [format=9.3]; print "Slope coefficient for MOD1 * MOD2: " (bq[1,9]) [format=9.3]; print "Slope coefficient for IDV * MOD1 * MOD2: " (bq[1,10]) [format=9.3]; print "Intercept: " aq [format=9.3]; print "R-squared change for the interaction term; " r2chXnq [format=9.3]; print "f-squared effect size for the interaction term: " fsquareq [format=9.3]; print "F value for the interaction term: " Fq [format=9.3]; print "Significance level of F (tabled value): " pFq [format=9.6]; print "Specified number of randomized data sets: " permutes; print "Significance level of F (randomization test): " siglevq [format=9.6]; quit;