Ejemplo Anova

Lectura de datos

library(skimr)
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(texreg)
## Version:  1.39.4
## Date:     2024-07-23
## Author:   Philip Leifeld (University of Manchester)
## 
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rempsyc)
## Suggested APA citation: Thériault, R. (2023). rempsyc: Convenience functions for psychology. 
## Journal of Open Source Software, 8(87), 5466. https://doi.org/10.21105/joss.05466
library(flextable)
library(rio)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(lsr)
#install.packages("flextable")
#install.packages("multcompView")
#install.packages("rempsyc")

#getwd()
anova1=import("./anova_I.xlsx", sep=";")
names(anova1)
## [1] "ID"        "bienestar" "Grupo"
anova2=read.csv("./anova_sueno.csv", sep=";")
names(anova2)
## [1] "id"    "vd"    "grupo"

Describir base de datos

anova1 %>%
  skim(bienestar)
Data summary
Name Piped data
Number of rows 16
Number of columns 3
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bienestar 0 1 5.44 2.85 2 3 4.5 8 10 ▇▃▁▃▃
anova1 %>%
  group_by(Grupo) %>%
  skim(bienestar)
Data summary
Name Piped data
Number of rows 16
Number of columns 3
_______________________
Column type frequency:
numeric 1
________________________
Group variables Grupo

Variable type: numeric

skim_variable Grupo n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bienestar control 0 1 3.00 0.82 2 2.75 3.0 3.25 4 ▃▁▇▁▃
bienestar gato 0 1 5.25 1.71 3 4.50 5.5 6.25 7 ▇▁▇▇▇
bienestar otro 0 1 4.25 2.63 2 2.75 3.5 5.00 8 ▇▃▁▁▃
bienestar perro 0 1 9.25 0.96 8 8.75 9.5 10.00 10 ▃▁▃▁▇
ggplot(anova1, aes(y=bienestar, x=Grupo)) +
  geom_boxplot()

Estimación del modelo

# Modelo anova
mod1= aov(bienestar ~ Grupo, data=anova1)
summary(mod1)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Grupo        3  87.69  29.229   10.24 0.00125 **
## Residuals   12  34.25   2.854                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sstable <- Anova(mod1, type = 2)

nice_table(sstable)

Sum Sq

Df

F value

Pr(>F)

87.69

3.00

10.24

0.00

34.25

12.00

knitr::kable(sstable, digits = 3)
Sum Sq Df F value Pr(>F)
Grupo 87.688 3 10.241 0.001
Residuals 34.250 12 NA NA
# F_crítico 
qf(0.05, 4-1, 16-4, lower.tail = F)
## [1] 3.490295

Comparar F observado(10.24) vs F crítico(3.49)

Describir base de datos

anova2 %>%
  skim(vd)
Data summary
Name Piped data
Number of rows 60
Number of columns 3
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
vd 0 1 4.37 2.13 1 3 4 6 10 ▅▇▆▂▁
anova2 %>%
  group_by(grupo) %>%
  skim(vd)
Data summary
Name Piped data
Number of rows 60
Number of columns 3
_______________________
Column type frequency:
numeric 1
________________________
Group variables grupo

Variable type: numeric

skim_variable grupo n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
vd C 0 1 3.00 1.51 1 2.0 3 3.5 7 ▇▅▂▁▁
vd EN 0 1 3.27 1.44 1 2.5 3 4.0 6 ▆▇▅▃▂
vd EP 0 1 4.73 1.49 2 4.0 5 6.0 7 ▆▇▆▆▃
vd IS 0 1 6.47 2.10 3 5.0 6 8.0 10 ▃▅▇▃▅
ggplot(anova2, aes(y=vd, x=grupo)) +
  geom_boxplot()

Estimación del modelo

# Modelo anova
mod1= aov(vd ~ grupo, data=anova2)
summary(mod1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## grupo        3  114.3   38.11    13.9 6.93e-07 ***
## Residuals   56  153.6    2.74                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sstable <- Anova(mod1, type = 2)

nice_table(sstable)

Sum Sq

Df

F value

Pr(>F)

114.33

3.00

13.89

0.00

153.60

56.00

knitr::kable(sstable, digits = 3)
Sum Sq Df F value Pr(>F)
grupo 114.333 3 13.895 0
Residuals 153.600 56 NA NA
# F_crítico 
qf(0.05, 3-1, 60-4, lower.tail = F)
## [1] 3.161861

Comparar F observado(13.89) vs F crítico(3.16)

Chequeo de Supuestos

Normalidad

plot(mod1)

#histogram(mod1$residuals)

ggplot(anova2) +
  geom_density(aes(x=vd))+
  geom_density(aes(vd, group=grupo, color=grupo))

shapiro.test(mod1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  mod1$residuals
## W = 0.98784, p-value = 0.814
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "C"))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(mod1$residuals, subset = anova2$grupo == "C")
## W = 0.8526, p-value = 0.01891
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "EN"))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(mod1$residuals, subset = anova2$grupo == "EN")
## W = 0.95073, p-value = 0.536
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "EP"))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(mod1$residuals, subset = anova2$grupo == "EP")
## W = 0.951, p-value = 0.5403
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "IS"))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(mod1$residuals, subset = anova2$grupo == "IS")
## W = 0.95742, p-value = 0.6476

Homocedasticidad

boxplot(vd~ grupo, data=anova2, col = c("yellow", "blue", "white","green"), ylab = "actitud hacia el lavado de manos")

leveneTest(vd ~ grupo, data=anova2)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.1061 0.3544
##       56
bartlett.test(vd ~ grupo, data = anova2 )
## 
##  Bartlett test of homogeneity of variances
## 
## data:  vd by grupo
## Bartlett's K-squared = 2.8121, df = 3, p-value = 0.4215
oneway.test(vd ~ grupo, data=anova2, var.equal = TRUE)
## 
##  One-way analysis of means
## 
## data:  vd and grupo
## F = 13.895, num df = 3, denom df = 56, p-value = 6.931e-07

Contrastes post-hoc

Si hemos detectado diferencias significativas entre los tipos de sensibilizaciones. ¿Sería posible saber cuáles son los grupos que generan estas diferencias?

pairwise.t.test( anova2$vd, 
                 anova2$grupo, 
                 p.adj = "bonferroni")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  anova2$vd and anova2$grupo 
## 
##    C       EN      EP   
## EN 1.000   -       -    
## EP 0.035   0.111   -    
## IS 2.5e-06 1.3e-05 0.035
## 
## P value adjustment method: bonferroni
intervalos= TukeyHSD(mod1)
intervalos
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = vd ~ grupo, data = anova2)
## 
## $grupo
##            diff        lwr      upr     p adj
## EN-C  0.2666667 -1.3346251 1.867958 0.9710793
## EP-C  1.7333333  0.1320416 3.334625 0.0289845
## IS-C  3.4666667  1.8653749 5.067958 0.0000024
## EP-EN 1.4666667 -0.1346251 3.067958 0.0839097
## IS-EN 3.2000000  1.5987083 4.801292 0.0000122
## IS-EP 1.7333333  0.1320416 3.334625 0.0289845
plot(intervalos)

library(multcompView)
multcompBoxplot(vd~ grupo, data=anova2, compFn = "TukeyHSD")

Contrastes planificados

table(anova2$grupo)

summary.lm(mod1)


means= emmeans(mod1, specs =~grupo)
Contraste1 = list(control_vs_todos  = c(-3,1,1,1))

contrast(means, Contraste1)

Contraste2 = list(EN_vs_EPIS  = c(0,-2,1,1))

contrast(means, Contraste2)

Contraste3 = list(EN_vs_EPIS  = c(0,0,-1,1))

contrast(means, Contraste3)
contrast1 <- c(-3, 1, 1, 1)
contrast2 <- c(0,-2, 1, 1)
contrast3 <- c(0, 0, -1, 1)


str(anova2)
## 'data.frame':    60 obs. of  3 variables:
##  $ id   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ vd   : int  4 5 7 4 3 5 6 2 6 5 ...
##  $ grupo: chr  "EP" "EP" "EP" "EP" ...
View(anova2)
anova2$grupof= as.factor(anova2$grupo)
contrasts(anova2$grupof) <- cbind(contrast1, contrast2, contrast3)
anova2$grupof
##  [1] EP EP EP EP EP EP EP EP EP EP EP EP EP EP EP EN EN EN EN EN EN EN EN EN EN
## [26] EN EN EN EN EN IS IS IS IS IS IS IS IS IS IS IS IS IS IS IS C  C  C  C  C 
## [51] C  C  C  C  C  C  C  C  C  C 
## attr(,"contrasts")
##    contrast1 contrast2 contrast3
## C         -3         0         0
## EN         1        -2         0
## EP         1         1        -1
## IS         1         1         1
## Levels: C EN EP IS
planificado <- aov(vd ~ grupof, data=anova2)
summary.lm(planificado)
## 
## Call:
## aov(formula = vd ~ grupof, data = anova2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4667 -1.0000 -0.2667  1.0667  4.0000 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.3667     0.2138  20.423  < 2e-16 ***
## grupofcontrast1   0.4556     0.1234   3.690 0.000509 ***
## grupofcontrast2   0.7778     0.1746   4.455 4.06e-05 ***
## grupofcontrast3   0.8667     0.3024   2.866 0.005842 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.656 on 56 degrees of freedom
## Multiple R-squared:  0.4267, Adjusted R-squared:  0.396 
## F-statistic: 13.89 on 3 and 56 DF,  p-value: 6.931e-07

Otro ejemplo

anova3=read.csv("./autoef.csv", sep=";")
names(anova2)
## [1] "id"     "vd"     "grupo"  "grupof"
anova3 %>%
  skim(bienestar)
Data summary
Name Piped data
Number of rows 20
Number of columns 3
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bienestar 0 1 15.05 6.57 6 9.75 14.5 19.25 27 ▇▃▃▃▅
anova3 %>%
  group_by(Grupo) %>%
  skim(bienestar)
Data summary
Name Piped data
Number of rows 20
Number of columns 3
_______________________
Column type frequency:
numeric 1
________________________
Group variables Grupo

Variable type: numeric

skim_variable Grupo n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bienestar 1problema 0 1 19.00 0.82 18 18.75 19.0 19.25 20 ▃▁▇▁▃
bienestar 2problema 0 1 14.50 1.29 13 13.75 14.5 15.25 16 ▇▇▁▇▇
bienestar 3problema 0 1 10.00 0.82 9 9.75 10.0 10.25 11 ▃▁▇▁▃
bienestar 4problema 0 1 7.00 0.82 6 6.75 7.0 7.25 8 ▃▁▇▁▃
bienestar control 0 1 24.75 1.71 23 23.75 24.5 25.50 27 ▇▇▇▁▇
ggplot(anova3, aes(y=bienestar, x=Grupo)) +
  geom_boxplot()

mod2= aov(bienestar ~ Grupo, data=anova3)
summary(mod2)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Grupo        4  801.2  200.30   152.1 6.02e-12 ***
## Residuals   15   19.7    1.32                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sstable <- Anova(mod2, type = 2)

nice_table(sstable)

Sum Sq

Df

F value

Pr(>F)

801.20

4.00

152.13

0.00

19.75

15.00

knitr::kable(sstable, digits = 3)
Sum Sq Df F value Pr(>F)
Grupo 801.20 4 152.127 0
Residuals 19.75 15 NA NA
# F_crítico 
qf(0.05, 5-1, 20-5, lower.tail = F)
## [1] 3.055568
plot(mod2)

shapiro.test(mod2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  mod2$residuals
## W = 0.9656, p-value = 0.6606
boxplot(bienestar~ Grupo, data=anova3, col = c("yellow", "blue", "white","green", "black"), ylab = "")

leveneTest(bienestar~ Grupo, data=anova3)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  1.1111 0.3876
##       15
bartlett.test(bienestar~ Grupo, data=anova3 )
## 
##  Bartlett test of homogeneity of variances
## 
## data:  bienestar by Grupo
## Bartlett's K-squared = 2.6752, df = 4, p-value = 0.6136
oneway.test(bienestar~ Grupo, data=anova3, var.equal = TRUE)
## 
##  One-way analysis of means
## 
## data:  bienestar and Grupo
## F = 152.13, num df = 4, denom df = 15, p-value = 6.018e-12
multcompBoxplot(bienestar~ Grupo, data=anova3)

pairwise.t.test( anova3$bienestar, 
                 anova3$Grupo, 
                 p.adj = "bonferroni")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  anova3$bienestar and anova3$Grupo 
## 
##           1problema 2problema 3problema 4problema
## 2problema 0.00056   -         -         -        
## 3problema 1.3e-07   0.00056   -         -        
## 4problema 2.4e-09   1.4e-06   0.02150   -        
## control   3.7e-05   2.1e-08   1.3e-10   8.6e-12  
## 
## P value adjustment method: bonferroni
intervalos= TukeyHSD(mod2)
intervalos
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = bienestar ~ Grupo, data = anova3)
## 
## $Grupo
##                       diff        lwr        upr     p adj
## 2problema-1problema  -4.50  -7.005472 -1.9945278 0.0004606
## 3problema-1problema  -9.00 -11.505472 -6.4945278 0.0000001
## 4problema-1problema -12.00 -14.505472 -9.4945278 0.0000000
## control-1problema     5.75   3.244528  8.2554722 0.0000317
## 3problema-2problema  -4.50  -7.005472 -1.9945278 0.0004606
## 4problema-2problema  -7.50 -10.005472 -4.9945278 0.0000012
## control-2problema    10.25   7.744528 12.7554722 0.0000000
## 4problema-3problema  -3.00  -5.505472 -0.4945278 0.0156512
## control-3problema    14.75  12.244528 17.2554722 0.0000000
## control-4problema    17.75  15.244528 20.2554722 0.0000000
plot(intervalos)

Anova Factorial o de una dos vías

Supóngase un estudio clínico que analiza la eficacia de un medicamento teniendo en cuenta dos factores, el sexo (masculino y femenino) y la juventud (joven, adulto). Se quiere analizar si el efecto es diferente entre alguno de los niveles de cada variable por si sola o en combinación.

Este estudio implica comprobar si el efecto medio del fármaco es significativamente distinto entre alguno de los siguientes grupos: hombres, mujeres, jóvenes, adultos, hombres jóvenes, hombres adultos, mujeres jóvenes y mujeres adultas.

subject <- as.factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
                       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))
sex <- c("female", "male", "male", "female", "male", "male", "male", "female",
         "female", "male", "male", "male", "male", "female", "female", "female",
         "male", "female", "female", "male", "male", "female", "male", "male",
         "male", "male", "male", "male", "female", "male" )
age <- c("adult", "adult", "adult", "adult", "adult", "adult", "young", "young",
         "adult", "young", "young", "adult", "young", "young", "young", "adult",
         "young", "adult", "young", "young", "young", "young", "adult", "young",
         "young", "young", "young", "young", "young", "adult")
result <- c(7.1, 11.0, 5.8, 8.8, 8.6, 8.0, 3.0, 5.2, 3.4, 4.0, 5.3, 11.3, 4.6, 6.4,
            13.5, 4.7, 5.1, 7.3, 9.5, 5.4, 3.7, 6.2, 10.0, 1.7, 2.9, 3.2, 4.7, 4.9,
            9.8, 9.4)

datos <- data.frame(subject, sex, age, result)
head(datos, 4)
##   subject    sex   age result
## 1       1 female adult    7.1
## 2       2   male adult   11.0
## 3       3   male adult    5.8
## 4       4 female adult    8.8
skim(datos)
Data summary
Name datos
Number of rows 30
Number of columns 4
_______________________
Column type frequency:
character 2
factor 1
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
sex 0 1 4 6 0 2 0
age 0 1 5 5 0 2 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
subject 0 1 FALSE 30 1: 1, 2: 1, 3: 1, 4: 1

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
result 0 1 6.48 2.93 1.7 4.62 5.6 8.75 13.5 ▅▇▃▅▂
p1 <- ggplot(data = datos, mapping = aes(x = age, y = result)) +
      geom_boxplot() +
      theme_bw()
p2 <- ggplot(data = datos, mapping = aes(x = sex, y = result)) +
      geom_boxplot() +
      theme_bw()
p3 <- ggplot(data = datos, mapping = aes(x = age, y = result, colour = sex)) +
      geom_boxplot() +
      theme_bw()

grid.arrange(p1, p2, ncol = 2)

p3

ggplot(data = datos, aes(x = age, y = result, colour = sex, group = sex)) +
    stat_summary(fun = mean, geom = "point") +
    stat_summary(fun = mean, geom = "line") +
    labs(y  =  'mean (result)') + 
    theme_bw()

ggplot(data = datos, aes(x = sex, y = result, colour = age, group = age)) +
    stat_summary(fun = mean, geom = "point") +
    stat_summary(fun = mean, geom = "line") +
    labs(y  =  'mean (result)') + 
    theme_bw()

anova_2vias <- aov(formula = result ~ sex*age, data = datos)
summary(anova_2vias)
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## sex          1  16.08   16.08   4.038  0.0550 .  
## age          1  38.96   38.96   9.786  0.0043 ** 
## sex:age      1  89.61   89.61  22.509 6.6e-05 ***
## Residuals   26 103.51    3.98                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etaSquared(anova_2vias)
##             eta.sq eta.sq.part
## sex     0.04842176   0.1040130
## age     0.15699885   0.2734635
## sex:age 0.36110080   0.4640119
#install.packages("emmeans")
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
aov2.emm = emmeans(anova_2vias, specs=~sex*age)
#Este modelo es correcto cuando NO hay interacción
emmeans(aov2.emm, pairwise~sex)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
##  sex    emmean    SE df lower.CL upper.CL
##  female   7.35 0.604 26     6.10     8.59
##  male     6.60 0.474 26     5.62     7.57
## 
## Results are averaged over the levels of: age 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast      estimate    SE df t.ratio p.value
##  female - male    0.747 0.768 26   0.973  0.3396
## 
## Results are averaged over the levels of: age
emmeans(aov2.emm, pairwise~age)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
##  age   emmean    SE df lower.CL upper.CL
##  adult   7.71 0.584 26     6.51     8.91
##  young   6.24 0.499 26     5.21     7.26
## 
## Results are averaged over the levels of: sex 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast      estimate    SE df t.ratio p.value
##  adult - young     1.47 0.768 26   1.915  0.0665
## 
## Results are averaged over the levels of: sex
emmeans(aov2.emm, pairwise~sex*age)
## $emmeans
##  sex    age   emmean    SE df lower.CL upper.CL
##  female adult   6.26 0.892 26     4.43     8.09
##  male   adult   9.16 0.754 26     7.61    10.71
##  female young   8.43 0.815 26     6.76    10.11
##  male   young   4.04 0.576 26     2.86     5.23
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                    estimate    SE df t.ratio p.value
##  female adult - male adult     -2.897 1.170 26  -2.480  0.0870
##  female adult - female young   -2.173 1.210 26  -1.799  0.2966
##  female adult - male young      2.218 1.060 26   2.089  0.1833
##  male adult - female young      0.724 1.110 26   0.652  0.9139
##  male adult - male young        5.115 0.949 26   5.391  0.0001
##  female young - male young      4.392 0.998 26   4.402  0.0009
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

Ejemplo de Balluerka & Vergara, 2002

id <- as.factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
                       18, 19, 20, 21, 22, 23, 24))

resultado <- c(7,9,10,8,8,11,10,6,13,15,11,10, 6,5,4,7,4,6,8,5,10,9,6,8)
desarrollo <- as.factor(c("precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío"))
comp_padres <- as.factor(c("a.mcomp", "a.mcomp", "a.mcomp", "a.mcomp", "b.pcomp", "b.pcomp", "b.pcomp", "b.pcomp","c.ambos", "c.ambos", "c.ambos", "c.ambos", "a.mcomp", "a.mcomp", "a.mcomp", "a.mcomp", "b.pcomp", "b.pcomp", "b.pcomp", "b.pcomp","c.ambos", "c.ambos", "c.ambos", "c.ambos"))

datos <- data.frame(id,desarrollo, comp_padres, resultado)
head(datos, 4)
##   id desarrollo comp_padres resultado
## 1  1     precoz     a.mcomp         7
## 2  2     precoz     a.mcomp         9
## 3  3     precoz     a.mcomp        10
## 4  4     precoz     a.mcomp         8
datos %>%
  group_by(desarrollo, comp_padres)%>%
  skim(resultado)
Data summary
Name Piped data
Number of rows 24
Number of columns 4
_______________________
Column type frequency:
numeric 1
________________________
Group variables desarrollo, comp_padres

Variable type: numeric

skim_variable desarrollo comp_padres n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
resultado precoz a.mcomp 0 1 8.50 1.29 7 7.75 8.5 9.25 10 ▇▇▁▇▇
resultado precoz b.pcomp 0 1 8.75 2.22 6 7.50 9.0 10.25 11 ▇▇▁▇▇
resultado precoz c.ambos 0 1 12.25 2.22 10 10.75 12.0 13.50 15 ▇▁▃▁▃
resultado tardío a.mcomp 0 1 5.50 1.29 4 4.75 5.5 6.25 7 ▇▇▁▇▇
resultado tardío b.pcomp 0 1 5.75 1.71 4 4.75 5.5 6.50 8 ▇▇▇▁▇
resultado tardío c.ambos 0 1 8.25 1.71 6 7.50 8.5 9.25 10 ▇▁▇▇▇
ggplot(data = datos, aes(x = desarrollo, y = resultado, colour = comp_padres, group = comp_padres)) +
    stat_summary(fun = mean, geom = "point") +
    stat_summary(fun = mean, geom = "line") +
    labs(y  =  'mean (result)') + 
    theme_bw()

ggplot(data = datos, aes(x = comp_padres, y = resultado, colour = desarrollo, group = desarrollo)) +
    stat_summary(fun = mean, geom = "point") +
    stat_summary(fun = mean, geom = "line") +
    labs(y  =  'mean (result)') + 
    theme_bw()

anova_simple1 <- aov(formula = resultado ~ comp_padres, data = datos)
summary(anova_simple1)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## comp_padres  2  52.33  26.167   4.396 0.0254 *
## Residuals   21 125.00   5.952                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_simple2 <- aov(formula = resultado ~ desarrollo, data = datos)
summary(anova_simple2)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## desarrollo   1  66.67   66.67   13.25 0.00144 **
## Residuals   22 110.67    5.03                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_factorial1 <- aov(formula = resultado ~ comp_padres + desarrollo, data = datos)
summary(anova_factorial1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## comp_padres  2  52.33   26.17   8.971 0.001656 ** 
## desarrollo   1  66.67   66.67  22.857 0.000114 ***
## Residuals   20  58.33    2.92                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_factorial2 <- aov(formula = resultado ~ comp_padres * desarrollo, data = datos)
summary(anova_factorial2)
##                        Df Sum Sq Mean Sq F value   Pr(>F)    
## comp_padres             2  52.33   26.17   8.263 0.002845 ** 
## desarrollo              1  66.67   66.67  21.053 0.000228 ***
## comp_padres:desarrollo  2   1.33    0.67   0.211 0.812124    
## Residuals              18  57.00    3.17                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#install.packages("emmeans")
library(emmeans)
aov2.emm = emmeans(anova_factorial2, specs=~comp_padres * desarrollo)
#Este modelo es correcto cuando NO hay interacción
emmeans(aov2.emm, pairwise~comp_padres)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
##  comp_padres emmean    SE df lower.CL upper.CL
##  a.mcomp       7.00 0.629 18     5.68     8.32
##  b.pcomp       7.25 0.629 18     5.93     8.57
##  c.ambos      10.25 0.629 18     8.93    11.57
## 
## Results are averaged over the levels of: desarrollo 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast          estimate   SE df t.ratio p.value
##  a.mcomp - b.pcomp    -0.25 0.89 18  -0.281  0.9575
##  a.mcomp - c.ambos    -3.25 0.89 18  -3.653  0.0049
##  b.pcomp - c.ambos    -3.00 0.89 18  -3.372  0.0091
## 
## Results are averaged over the levels of: desarrollo 
## P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(aov2.emm, pairwise~desarrollo)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
##  desarrollo emmean    SE df lower.CL upper.CL
##  precoz       9.83 0.514 18     8.75    10.91
##  tardío       6.50 0.514 18     5.42     7.58
## 
## Results are averaged over the levels of: comp_padres 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast        estimate    SE df t.ratio p.value
##  precoz - tardío     3.33 0.726 18   4.588  0.0002
## 
## Results are averaged over the levels of: comp_padres
emmeans(aov2.emm, pairwise~desarrollo*comp_padres)
## $emmeans
##  desarrollo comp_padres emmean   SE df lower.CL upper.CL
##  precoz     a.mcomp       8.50 0.89 18     6.63    10.37
##  tardío     a.mcomp       5.50 0.89 18     3.63     7.37
##  precoz     b.pcomp       8.75 0.89 18     6.88    10.62
##  tardío     b.pcomp       5.75 0.89 18     3.88     7.62
##  precoz     c.ambos      12.25 0.89 18    10.38    14.12
##  tardío     c.ambos       8.25 0.89 18     6.38    10.12
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                        estimate   SE df t.ratio p.value
##  precoz a.mcomp - tardío a.mcomp     3.00 1.26 18   2.384  0.2133
##  precoz a.mcomp - precoz b.pcomp    -0.25 1.26 18  -0.199  0.9999
##  precoz a.mcomp - tardío b.pcomp     2.75 1.26 18   2.185  0.2913
##  precoz a.mcomp - precoz c.ambos    -3.75 1.26 18  -2.980  0.0735
##  precoz a.mcomp - tardío c.ambos     0.25 1.26 18   0.199  0.9999
##  tardío a.mcomp - precoz b.pcomp    -3.25 1.26 18  -2.583  0.1524
##  tardío a.mcomp - tardío b.pcomp    -0.25 1.26 18  -0.199  0.9999
##  tardío a.mcomp - precoz c.ambos    -6.75 1.26 18  -5.364  0.0005
##  tardío a.mcomp - tardío c.ambos    -2.75 1.26 18  -2.185  0.2913
##  precoz b.pcomp - tardío b.pcomp     3.00 1.26 18   2.384  0.2133
##  precoz b.pcomp - precoz c.ambos    -3.50 1.26 18  -2.782  0.1067
##  precoz b.pcomp - tardío c.ambos     0.50 1.26 18   0.397  0.9985
##  tardío b.pcomp - precoz c.ambos    -6.50 1.26 18  -5.166  0.0008
##  tardío b.pcomp - tardío c.ambos    -2.50 1.26 18  -1.987  0.3866
##  precoz c.ambos - tardío c.ambos     4.00 1.26 18   3.179  0.0499
## 
## P value adjustment: tukey method for comparing a family of 6 estimates

Anova Factorial (otro ejemplo)

Queremos estudiar la influencia que tienen el el sexo del profesor y el tipo de colegio sobre los sintomas el clima escolar. Para ello se mide con una escala de clima escolar (variable cuantitativa dependiente) tres tipos de colegios (municipal, PS y PP) y estudiantes (H y M).

clima <- c(15.29, 15.89, 16.02, 16.56, 15.46, 16.91, 16.99, 17.27, 16.85,
                 16.35, 17.23, 17.81, 17.74, 18.02, 18.37, 12.07, 12.42, 12.73,
                 13.02, 12.05, 12.92, 13.01, 12.21, 13.49, 14.01, 13.30, 12.82,
                 12.49, 13.55, 14.53)
sexo <- c(rep(c("Masculino", "Femenino"), c(15,15)))
colegio <- rep(c("a.M","b.PS", "c.PP"), each = 5, times = 2)
datos <- data.frame(sexo = sexo, colegio = as.factor(colegio),
                    clima = clima)
head(datos)
##        sexo colegio clima
## 1 Masculino     a.M 15.29
## 2 Masculino     a.M 15.89
## 3 Masculino     a.M 16.02
## 4 Masculino     a.M 16.56
## 5 Masculino     a.M 15.46
## 6 Masculino    b.PS 16.91
library(ggplot2)
library(gridExtra)

p1 <- ggplot(data = datos, aes(x = sexo, y = clima)) + 
      geom_boxplot() + theme_bw()
p2 <- ggplot(data = datos, aes(x = colegio, y = clima)) +
      geom_boxplot() + theme_bw()
p3 <- ggplot(data = datos, aes(x = sexo, y = clima, colour = colegio)) +
      geom_boxplot() + theme_bw()

grid.arrange(p1, p2, ncol = 2)

p3

interaction.plot(sexo, colegio, clima, data = datos, col = 1:3, type = "b")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
## Warning in axis(...): "data" is not a graphical parameter

interaction.plot(colegio, sexo, clima, data = datos,col = 2:3, type = "b")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
## Warning in axis(...): "data" is not a graphical parameter

ggplot(data = datos, aes(x = sexo, y = clima, colour = colegio,
                         group = colegio)) +
    stat_summary(fun = mean, geom = "point") +
    stat_summary(fun = mean, geom = "line") +
    labs(y  =  'mean (resistencia)') +
    theme_bw()

ggplot(data = datos, aes(x = colegio, y = clima, colour = sexo,
                         group = sexo)) +
    stat_summary(fun = mean, geom = "point") +
    stat_summary(fun = mean, geom = "line") +
    labs(y  =  'mean (resistencia)') +
    theme_bw()

anova <- aov(clima ~ colegio + sexo, data = datos)
summary(anova)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## colegio      2  10.41    5.21   15.53 3.65e-05 ***
## sexo         1 112.68  112.68  336.02  < 2e-16 ***
## Residuals   26   8.72    0.34                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova <- aov(clima ~ colegio * sexo, data = datos)
summary(anova)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## colegio       2  10.41    5.21  17.563 2.00e-05 ***
## sexo          1 112.68  112.68 380.082 3.19e-16 ***
## colegio:sexo  2   1.60    0.80   2.705   0.0873 .  
## Residuals    24   7.11    0.30                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#install.packages("lsr")

etaSquared(anova)
##                  eta.sq eta.sq.part
## colegio      0.07900327   0.5940887
## sexo         0.85485219   0.9406061
## colegio:sexo 0.01216553   0.1839235
plot(anova)