Monday, October 20, 2025

Utilizing R to duplicate widespread SPSS a number of regression output


The next put up replicates a number of the commonplace output you would possibly get from a a number of regression evaluation in SPSS. A replica of the code in RMarkdown format is offered on github. The put up was motivated by this earlier put up that mentioned utilizing R to show psychology college students statistics.

library(overseas)  # learn.spss
library(psych)  # describe 
library(Hmisc)  # rcorr 
library(QuantPsyc)  # lm.beta
library(automotive)  # vif, durbinWatsonTest
library(MASS)  # studres
library(lmSupport)  #lm.sumSquares
library(perturb)  # colldiag

With a view to emulate SPSS output, it’s essential to put in a number of add-on packages. The above library instructions load the packages into your R workspace. I’ve highlighted within the remark the names of the capabilities which can be used on this script.
Chances are you’ll not have the above packages put in.
If not, run instructions like:

  • set up.packages('overseas')
  • set up.packages('psych')
  • and so on.

for every of the above packages not put in or use the “packages” tab in RStudio to put in.
Observe additionally that a lot of this evaluation might be carried out utilizing
Rcommander utilizing a extra SPSS-style GUI atmosphere.

cars_raw <- learn.spss("automobiles.sav", to.information.body = TRUE)
# eliminate lacking information listwise
automobiles <- na.omit(cars_raw[, c("accel", "mpg", "engine", "horse", "weight")])

Make sure that automobiles.sav is the working listing.

# be aware the necessity to take care of lacking information
psych::describe(cars_raw)
##             var   n    imply     sd  median trimmed    mad    min     max
## mpg           1 398   23.51   7.82   23.00   23.06   8.90   9.00   46.60
## engine        2 406  194.04 105.21  148.50  183.75  86.73   4.00  455.00
## horse         3 400  104.83  38.52   95.00  100.36  29.65  46.00  230.00
## weight        4 406 2969.56 849.83 2811.00 2913.97 947.38 732.00 5140.00
## accel         5 406   15.50   2.82   15.50   15.45   2.59   8.00   24.80
## yr*         6 405    6.94   3.74    7.00    6.93   4.45   1.00   13.00
## origin*       7 405    1.57   0.80    1.00    1.46   0.00   1.00    3.00
## cylinder*     8 405    3.20   1.33    2.00    3.14   0.00   1.00    5.00
## filter_.*     9 398    1.73   0.44    2.00    1.79   0.00   1.00    2.00
## weightKG     10 406 1346.97 385.48 1275.05 1321.75 429.72 332.03 2331.46
## engineLitre  11 406    3.19   1.73    2.44    3.02   1.42   0.07    7.47
##               vary  skew kurtosis    se
## mpg           37.60  0.45    -0.53  0.39
## engine       451.00  0.69    -0.81  5.22
## horse        184.00  1.04     0.55  1.93
## weight      4408.00  0.46    -0.77 42.18
## accel         16.80  0.21     0.35  0.14
## yr*         12.00  0.02    -1.21  0.19
## origin*        2.00  0.92    -0.81  0.04
## cylinder*      4.00  0.27    -1.69  0.07
## filter_.*      1.00 -1.04    -0.92  0.02
## weightKG    1999.43  0.46    -0.77 19.13
## engineLitre    7.41  0.69    -0.81  0.09

dim(automobiles)
## [1] 392   5
head(automobiles)
##   accel mpg engine horse weight
## 1  12.0  18    307   130   3504
## 2  11.5  15    350   165   3693
## 3  11.0  18    318   150   3436
## 4  12.0  16    304   150   3433
## 5  10.5  17    302   140   3449
## 6  10.0  15    429   198   4341
str(automobiles)
## 'information.body':    392 obs. of  5 variables:
##  $ accel : num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ mpg   : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ engine: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horse : num  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight: num  3504 3693 3436 3433 3449 ...
##  - attr(*, "na.motion")=Class 'omit'  Named int [1:14] 11 12 13 14 15 18 39 40 134 338 ...
##   .. ..- attr(*, "names")= chr [1:14] "11" "12" "13" "14" ...
match <- lm(accel ~ mpg + engine + horse + weight, information = automobiles)

Descriptive Statistics

# Descriptive statistics
psych::describe(automobiles)
##        var   n    imply     sd  median trimmed    mad min    max  vary
## accel    1 392   15.52   2.78   15.50   15.46   2.52   8   24.8   16.8
## mpg      2 392   23.45   7.81   22.75   22.99   8.60   9   46.6   37.6
## engine   3 392  193.65 104.94  148.50  183.15  86.73   4  455.0  451.0
## horse    4 392  104.21  38.23   93.00   99.61  28.17  46  230.0  184.0
## weight   5 392 2967.38 852.29 2797.50 2909.64 945.90 732 5140.0 4408.0
##        skew kurtosis    se
## accel  0.27     0.43  0.14
## mpg    0.45    -0.54  0.39
## engine 0.69    -0.77  5.30
## horse  1.09     0.71  1.93
## weight 0.48    -0.76 43.05

# correlations
cor(automobiles)
##          accel     mpg  engine   horse  weight
## accel   1.0000  0.4375 -0.5298 -0.6936 -0.4013
## mpg     0.4375  1.0000 -0.7893 -0.7713 -0.8072
## engine -0.5298 -0.7893  1.0000  0.8959  0.9339
## horse  -0.6936 -0.7713  0.8959  1.0000  0.8572
## weight -0.4013 -0.8072  0.9339  0.8572  1.0000
rcorr(as.matrix(automobiles))  # embody sig check for all correlations
##        accel   mpg engine horse weight
## accel   1.00  0.44  -0.53 -0.69  -0.40
## mpg     0.44  1.00  -0.79 -0.77  -0.81
## engine -0.53 -0.79   1.00  0.90   0.93
## horse  -0.69 -0.77   0.90  1.00   0.86
## weight -0.40 -0.81   0.93  0.86   1.00
## 
## n= 392 
## 
## 
## P
##        accel mpg engine horse weight
## accel         0   0      0     0    
## mpg     0         0      0     0    
## engine  0     0          0     0    
## horse   0     0   0            0    
## weight  0     0   0      0
# scatterplot matrix if you need
pairs.panels(automobiles)

Abstract of mannequin

# r-square, adjusted r-square, std. error of estimate, general ANOVA, df, p,
# unstandardised coefficients, sig exams
abstract(match)
## 
## Name:
## lm(method = accel ~ mpg + engine + horse + weight, information = automobiles)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.177 -1.023 -0.184  0.936  6.873 
## 
## Coefficients:
##              Estimate Std. Error t worth Pr(>|t|)    
## (Intercept) 16.980778   0.977425   17.37   <2e-16 ***
## mpg          0.007476   0.019298    0.39   0.6987    
## engine      -0.008230   0.002674   -3.08   0.0022 ** 
## horse       -0.087169   0.005204  -16.75   <2e-16 ***
## weight       0.003046   0.000297   10.24   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual commonplace error: 1.7 on 387 levels of freedom
## A number of R-squared:  0.631,  Adjusted R-squared:  0.627 
## F-statistic:  166 on 4 and 387 DF,  p-value: <2e-16
### more information when it comes to sums of squares
anova(match)
## Evaluation of Variance Desk
## 
## Response: accel
##            Df Sum Sq Imply Sq F worth Pr(>F)    
## mpg         1    577     577   200.8 <2e-16 ***
## engine      1    272     272    94.7 <2e-16 ***
## horse       1    753     753   261.8 <2e-16 ***
## weight      1    302     302   104.9 <2e-16 ***
## Residuals 387   1113       3                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# 95% confidence intervals (defaults to 95%)
confint(match)
##                 2.5 %    97.5 %
## (Intercept) 15.059049 18.902506
## mpg         -0.030466  0.045418
## engine      -0.013488 -0.002972
## horse       -0.097401 -0.076938
## weight       0.002461  0.003630
# however can specify completely different confidence intervals
confint(match, degree = 0.99)
##                 0.5 %    99.5 %
## (Intercept) 14.450621 19.510934
## mpg         -0.042478  0.057430
## engine      -0.015153 -0.001308
## horse       -0.100641 -0.073698
## weight       0.002276  0.003816

# standardised coefficients
lm.beta(match)
##      mpg   engine    horse   weight 
##  0.02101 -0.31093 -1.19988  0.93456

# or you might do it manually
zcars <- information.body(scale(automobiles))  # make all variables z-scores
zfit <- lm(accel ~ mpg + engine + horse + weight, information = zcars)
coef(zfit)[-1]
##      mpg   engine    horse   weight 
##  0.02101 -0.31093 -1.19988  0.93456

# correlations: zero-order, semi-partial, partial obscure perform appears to
# do it
sqrt(lm.sumSquares(match)[, c(2, 3)])
##              dR-sqr pEta-sqr
## (Intercept) 0.53638   0.6620
## mpg         0.01000   0.0200
## engine      0.09487   0.1546
## horse       0.51711   0.6483
## weight      0.31623   0.4617
## Error (SSE)      NA       NA
## Complete (SST)      NA       NA

# or use personal perform
cor_lm <- perform(match) {
    dv <- names(match$mannequin)[1]
    dv_data <- match$mannequin[, dv]
    ivs <- names(match$mannequin)[-1]
    iv_data <- match$mannequin[, ivs]
    x <- match$mannequin
    x_omit <- lapply(ivs, perform(X) x[, c(dv, setdiff(ivs, X))])
    names(x_omit) <- ivs
    lapply(x_omit, head)
    fits_omit <- lapply(x_omit, perform(X) lm(as.method(paste(dv, "~ .")), 
        information = X))
    resid_omit <- sapply(fits_omit, resid)
    iv_omit <- lapply(ivs, perform(X) lm(as.method(paste(X, "~ .")), information = iv_data))
    resid_iv_omit <- sapply(iv_omit, resid)

    outcomes <- sapply(seq(ivs), perform(i) c(zeroorder = cor(iv_data[, i], 
        dv_data), partial = cor(resid_iv_omit[, i], resid_omit[, i]), semipartial = cor(resid_iv_omit[, 
        i], dv_data)))
    outcomes <- information.body(outcomes)

    names(outcomes) <- ivs
    outcomes <- information.body(t(outcomes))
    outcomes
}

spherical(cor_lm(match), 3)
##        zeroorder partial semipartial
## mpg        0.438   0.020       0.012
## engine    -0.530  -0.155      -0.095
## horse     -0.694  -0.648      -0.517
## weight    -0.401   0.462       0.316

Assumption testing

# Durbin Watson check
durbinWatsonTest(match)
##  lag Autocorrelation D-W Statistic p-value
##    1           0.136         1.721   0.004
##  Various speculation: rho != 0

# vif
vif(match)
##    mpg engine  horse weight 
##  3.085 10.709  5.383  8.736

# tolerance
1/vif(match)
##     mpg  engine   horse  weight 
## 0.32415 0.09338 0.18576 0.11447

# collinearity diagnostics
colldiag(match)
## Situation
## Index    Variance Decomposition Proportions
##           intercept mpg   engine horse weight
## 1   1.000 0.000     0.001 0.001  0.001 0.000 
## 2   3.623 0.002     0.051 0.016  0.005 0.001 
## 3  16.214 0.006     0.066 0.365  0.763 0.019 
## 4  18.519 0.127     0.431 0.243  0.152 0.227 
## 5  32.892 0.865     0.451 0.375  0.079 0.753

# residual statistics
rfit <- information.body(predicted = predict(match), residuals = resid(match), studentised_residuals = studres(match))
psych::describe(rfit)
##                       var   n  imply   sd median trimmed  mad   min   max
## predicted               1 392 15.52 2.21  16.11   15.80 1.40  3.13 20.06
## residuals               2 392  0.00 1.69  -0.18   -0.11 1.39 -4.18  6.87
## studentised_residuals   3 392  0.00 1.01  -0.11   -0.07 0.82 -2.49  4.47
##                       vary  skew kurtosis   se
## predicted             16.93 -1.61     4.10 0.11
## residuals             11.05  0.75     1.10 0.09
## studentised_residuals  6.95  0.81     1.38 0.05

# distribution of standarised residuals
zresid <- scale(resid(match))
hist(zresid)
# or add regular curve http://www.statmethods.web/graphs/density.html
hist_with_normal_curve <- perform(x, breaks = 24) {
    h <- hist(zresid, breaks = breaks, col = "lightblue")
    xfit <- seq(min(x), max(x), size = 40)
    yfit <- dnorm(xfit, imply = imply(x), sd = sd(x))
    yfit <- yfit * diff(h$mids[1:2]) * size(x)
    traces(xfit, yfit, lwd = 2)
}
hist_with_normal_curve(zresid)

# normality of residuals
qqnorm(zresid)
abline(a = 0, b = 1)

# plot predicted by residual
plot(predict(match), resid(match))

# plot dependent by residual
plot(automobiles$accel, resid(match))

Related Articles

Latest Articles