library(skimr)
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(texreg)
## Version: 1.39.4
## Date: 2024-07-23
## Author: Philip Leifeld (University of Manchester)
##
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rempsyc)
## Suggested APA citation: Thériault, R. (2023). rempsyc: Convenience functions for psychology.
## Journal of Open Source Software, 8(87), 5466. https://doi.org/10.21105/joss.05466
library(flextable)
library(rio)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(lsr)
#install.packages("flextable")
#install.packages("multcompView")
#install.packages("rempsyc")
#getwd()
anova1=import("./anova_I.xlsx", sep=";")
names(anova1)
## [1] "ID" "bienestar" "Grupo"
anova2=read.csv("./anova_sueno.csv", sep=";")
names(anova2)
## [1] "id" "vd" "grupo"
anova1 %>%
skim(bienestar)
| Name | Piped data |
| Number of rows | 16 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| bienestar | 0 | 1 | 5.44 | 2.85 | 2 | 3 | 4.5 | 8 | 10 | ▇▃▁▃▃ |
anova1 %>%
group_by(Grupo) %>%
skim(bienestar)
| Name | Piped data |
| Number of rows | 16 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Grupo |
Variable type: numeric
| skim_variable | Grupo | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| bienestar | control | 0 | 1 | 3.00 | 0.82 | 2 | 2.75 | 3.0 | 3.25 | 4 | ▃▁▇▁▃ |
| bienestar | gato | 0 | 1 | 5.25 | 1.71 | 3 | 4.50 | 5.5 | 6.25 | 7 | ▇▁▇▇▇ |
| bienestar | otro | 0 | 1 | 4.25 | 2.63 | 2 | 2.75 | 3.5 | 5.00 | 8 | ▇▃▁▁▃ |
| bienestar | perro | 0 | 1 | 9.25 | 0.96 | 8 | 8.75 | 9.5 | 10.00 | 10 | ▃▁▃▁▇ |
ggplot(anova1, aes(y=bienestar, x=Grupo)) +
geom_boxplot()
# Modelo anova
mod1= aov(bienestar ~ Grupo, data=anova1)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Grupo 3 87.69 29.229 10.24 0.00125 **
## Residuals 12 34.25 2.854
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sstable <- Anova(mod1, type = 2)
nice_table(sstable)
Sum Sq | Df | F value | Pr(>F) |
|---|---|---|---|
87.69 | 3.00 | 10.24 | 0.00 |
34.25 | 12.00 |
knitr::kable(sstable, digits = 3)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| Grupo | 87.688 | 3 | 10.241 | 0.001 |
| Residuals | 34.250 | 12 | NA | NA |
# F_crítico
qf(0.05, 4-1, 16-4, lower.tail = F)
## [1] 3.490295
Comparar F observado(10.24) vs F crítico(3.49)
anova2 %>%
skim(vd)
| Name | Piped data |
| Number of rows | 60 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| vd | 0 | 1 | 4.37 | 2.13 | 1 | 3 | 4 | 6 | 10 | ▅▇▆▂▁ |
anova2 %>%
group_by(grupo) %>%
skim(vd)
| Name | Piped data |
| Number of rows | 60 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | grupo |
Variable type: numeric
| skim_variable | grupo | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| vd | C | 0 | 1 | 3.00 | 1.51 | 1 | 2.0 | 3 | 3.5 | 7 | ▇▅▂▁▁ |
| vd | EN | 0 | 1 | 3.27 | 1.44 | 1 | 2.5 | 3 | 4.0 | 6 | ▆▇▅▃▂ |
| vd | EP | 0 | 1 | 4.73 | 1.49 | 2 | 4.0 | 5 | 6.0 | 7 | ▆▇▆▆▃ |
| vd | IS | 0 | 1 | 6.47 | 2.10 | 3 | 5.0 | 6 | 8.0 | 10 | ▃▅▇▃▅ |
ggplot(anova2, aes(y=vd, x=grupo)) +
geom_boxplot()
# Modelo anova
mod1= aov(vd ~ grupo, data=anova2)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## grupo 3 114.3 38.11 13.9 6.93e-07 ***
## Residuals 56 153.6 2.74
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sstable <- Anova(mod1, type = 2)
nice_table(sstable)
Sum Sq | Df | F value | Pr(>F) |
|---|---|---|---|
114.33 | 3.00 | 13.89 | 0.00 |
153.60 | 56.00 |
knitr::kable(sstable, digits = 3)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| grupo | 114.333 | 3 | 13.895 | 0 |
| Residuals | 153.600 | 56 | NA | NA |
# F_crítico
qf(0.05, 3-1, 60-4, lower.tail = F)
## [1] 3.161861
Comparar F observado(13.89) vs F crítico(3.16)
plot(mod1)
#histogram(mod1$residuals)
ggplot(anova2) +
geom_density(aes(x=vd))+
geom_density(aes(vd, group=grupo, color=grupo))
shapiro.test(mod1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1$residuals
## W = 0.98784, p-value = 0.814
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "C"))
##
## Shapiro-Wilk normality test
##
## data: subset(mod1$residuals, subset = anova2$grupo == "C")
## W = 0.8526, p-value = 0.01891
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "EN"))
##
## Shapiro-Wilk normality test
##
## data: subset(mod1$residuals, subset = anova2$grupo == "EN")
## W = 0.95073, p-value = 0.536
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "EP"))
##
## Shapiro-Wilk normality test
##
## data: subset(mod1$residuals, subset = anova2$grupo == "EP")
## W = 0.951, p-value = 0.5403
shapiro.test(subset(mod1$residuals, subset = anova2$grupo == "IS"))
##
## Shapiro-Wilk normality test
##
## data: subset(mod1$residuals, subset = anova2$grupo == "IS")
## W = 0.95742, p-value = 0.6476
boxplot(vd~ grupo, data=anova2, col = c("yellow", "blue", "white","green"), ylab = "actitud hacia el lavado de manos")
leveneTest(vd ~ grupo, data=anova2)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.1061 0.3544
## 56
bartlett.test(vd ~ grupo, data = anova2 )
##
## Bartlett test of homogeneity of variances
##
## data: vd by grupo
## Bartlett's K-squared = 2.8121, df = 3, p-value = 0.4215
oneway.test(vd ~ grupo, data=anova2, var.equal = TRUE)
##
## One-way analysis of means
##
## data: vd and grupo
## F = 13.895, num df = 3, denom df = 56, p-value = 6.931e-07
Si hemos detectado diferencias significativas entre los tipos de sensibilizaciones. ¿Sería posible saber cuáles son los grupos que generan estas diferencias?
pairwise.t.test( anova2$vd,
anova2$grupo,
p.adj = "bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: anova2$vd and anova2$grupo
##
## C EN EP
## EN 1.000 - -
## EP 0.035 0.111 -
## IS 2.5e-06 1.3e-05 0.035
##
## P value adjustment method: bonferroni
intervalos= TukeyHSD(mod1)
intervalos
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = vd ~ grupo, data = anova2)
##
## $grupo
## diff lwr upr p adj
## EN-C 0.2666667 -1.3346251 1.867958 0.9710793
## EP-C 1.7333333 0.1320416 3.334625 0.0289845
## IS-C 3.4666667 1.8653749 5.067958 0.0000024
## EP-EN 1.4666667 -0.1346251 3.067958 0.0839097
## IS-EN 3.2000000 1.5987083 4.801292 0.0000122
## IS-EP 1.7333333 0.1320416 3.334625 0.0289845
plot(intervalos)
library(multcompView)
multcompBoxplot(vd~ grupo, data=anova2, compFn = "TukeyHSD")
table(anova2$grupo)
summary.lm(mod1)
means= emmeans(mod1, specs =~grupo)
Contraste1 = list(control_vs_todos = c(-3,1,1,1))
contrast(means, Contraste1)
Contraste2 = list(EN_vs_EPIS = c(0,-2,1,1))
contrast(means, Contraste2)
Contraste3 = list(EN_vs_EPIS = c(0,0,-1,1))
contrast(means, Contraste3)
contrast1 <- c(-3, 1, 1, 1)
contrast2 <- c(0,-2, 1, 1)
contrast3 <- c(0, 0, -1, 1)
str(anova2)
## 'data.frame': 60 obs. of 3 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ vd : int 4 5 7 4 3 5 6 2 6 5 ...
## $ grupo: chr "EP" "EP" "EP" "EP" ...
View(anova2)
anova2$grupof= as.factor(anova2$grupo)
contrasts(anova2$grupof) <- cbind(contrast1, contrast2, contrast3)
anova2$grupof
## [1] EP EP EP EP EP EP EP EP EP EP EP EP EP EP EP EN EN EN EN EN EN EN EN EN EN
## [26] EN EN EN EN EN IS IS IS IS IS IS IS IS IS IS IS IS IS IS IS C C C C C
## [51] C C C C C C C C C C
## attr(,"contrasts")
## contrast1 contrast2 contrast3
## C -3 0 0
## EN 1 -2 0
## EP 1 1 -1
## IS 1 1 1
## Levels: C EN EP IS
planificado <- aov(vd ~ grupof, data=anova2)
summary.lm(planificado)
##
## Call:
## aov(formula = vd ~ grupof, data = anova2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4667 -1.0000 -0.2667 1.0667 4.0000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3667 0.2138 20.423 < 2e-16 ***
## grupofcontrast1 0.4556 0.1234 3.690 0.000509 ***
## grupofcontrast2 0.7778 0.1746 4.455 4.06e-05 ***
## grupofcontrast3 0.8667 0.3024 2.866 0.005842 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.656 on 56 degrees of freedom
## Multiple R-squared: 0.4267, Adjusted R-squared: 0.396
## F-statistic: 13.89 on 3 and 56 DF, p-value: 6.931e-07
anova3=read.csv("./autoef.csv", sep=";")
names(anova2)
## [1] "id" "vd" "grupo" "grupof"
anova3 %>%
skim(bienestar)
| Name | Piped data |
| Number of rows | 20 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| bienestar | 0 | 1 | 15.05 | 6.57 | 6 | 9.75 | 14.5 | 19.25 | 27 | ▇▃▃▃▅ |
anova3 %>%
group_by(Grupo) %>%
skim(bienestar)
| Name | Piped data |
| Number of rows | 20 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Grupo |
Variable type: numeric
| skim_variable | Grupo | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| bienestar | 1problema | 0 | 1 | 19.00 | 0.82 | 18 | 18.75 | 19.0 | 19.25 | 20 | ▃▁▇▁▃ |
| bienestar | 2problema | 0 | 1 | 14.50 | 1.29 | 13 | 13.75 | 14.5 | 15.25 | 16 | ▇▇▁▇▇ |
| bienestar | 3problema | 0 | 1 | 10.00 | 0.82 | 9 | 9.75 | 10.0 | 10.25 | 11 | ▃▁▇▁▃ |
| bienestar | 4problema | 0 | 1 | 7.00 | 0.82 | 6 | 6.75 | 7.0 | 7.25 | 8 | ▃▁▇▁▃ |
| bienestar | control | 0 | 1 | 24.75 | 1.71 | 23 | 23.75 | 24.5 | 25.50 | 27 | ▇▇▇▁▇ |
ggplot(anova3, aes(y=bienestar, x=Grupo)) +
geom_boxplot()
mod2= aov(bienestar ~ Grupo, data=anova3)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Grupo 4 801.2 200.30 152.1 6.02e-12 ***
## Residuals 15 19.7 1.32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sstable <- Anova(mod2, type = 2)
nice_table(sstable)
Sum Sq | Df | F value | Pr(>F) |
|---|---|---|---|
801.20 | 4.00 | 152.13 | 0.00 |
19.75 | 15.00 |
knitr::kable(sstable, digits = 3)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| Grupo | 801.20 | 4 | 152.127 | 0 |
| Residuals | 19.75 | 15 | NA | NA |
# F_crítico
qf(0.05, 5-1, 20-5, lower.tail = F)
## [1] 3.055568
plot(mod2)
shapiro.test(mod2$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod2$residuals
## W = 0.9656, p-value = 0.6606
boxplot(bienestar~ Grupo, data=anova3, col = c("yellow", "blue", "white","green", "black"), ylab = "")
leveneTest(bienestar~ Grupo, data=anova3)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.1111 0.3876
## 15
bartlett.test(bienestar~ Grupo, data=anova3 )
##
## Bartlett test of homogeneity of variances
##
## data: bienestar by Grupo
## Bartlett's K-squared = 2.6752, df = 4, p-value = 0.6136
oneway.test(bienestar~ Grupo, data=anova3, var.equal = TRUE)
##
## One-way analysis of means
##
## data: bienestar and Grupo
## F = 152.13, num df = 4, denom df = 15, p-value = 6.018e-12
multcompBoxplot(bienestar~ Grupo, data=anova3)
pairwise.t.test( anova3$bienestar,
anova3$Grupo,
p.adj = "bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: anova3$bienestar and anova3$Grupo
##
## 1problema 2problema 3problema 4problema
## 2problema 0.00056 - - -
## 3problema 1.3e-07 0.00056 - -
## 4problema 2.4e-09 1.4e-06 0.02150 -
## control 3.7e-05 2.1e-08 1.3e-10 8.6e-12
##
## P value adjustment method: bonferroni
intervalos= TukeyHSD(mod2)
intervalos
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bienestar ~ Grupo, data = anova3)
##
## $Grupo
## diff lwr upr p adj
## 2problema-1problema -4.50 -7.005472 -1.9945278 0.0004606
## 3problema-1problema -9.00 -11.505472 -6.4945278 0.0000001
## 4problema-1problema -12.00 -14.505472 -9.4945278 0.0000000
## control-1problema 5.75 3.244528 8.2554722 0.0000317
## 3problema-2problema -4.50 -7.005472 -1.9945278 0.0004606
## 4problema-2problema -7.50 -10.005472 -4.9945278 0.0000012
## control-2problema 10.25 7.744528 12.7554722 0.0000000
## 4problema-3problema -3.00 -5.505472 -0.4945278 0.0156512
## control-3problema 14.75 12.244528 17.2554722 0.0000000
## control-4problema 17.75 15.244528 20.2554722 0.0000000
plot(intervalos)
Supóngase un estudio clínico que analiza la eficacia de un medicamento teniendo en cuenta dos factores, el sexo (masculino y femenino) y la juventud (joven, adulto). Se quiere analizar si el efecto es diferente entre alguno de los niveles de cada variable por si sola o en combinación.
Este estudio implica comprobar si el efecto medio del fármaco es significativamente distinto entre alguno de los siguientes grupos: hombres, mujeres, jóvenes, adultos, hombres jóvenes, hombres adultos, mujeres jóvenes y mujeres adultas.
subject <- as.factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))
sex <- c("female", "male", "male", "female", "male", "male", "male", "female",
"female", "male", "male", "male", "male", "female", "female", "female",
"male", "female", "female", "male", "male", "female", "male", "male",
"male", "male", "male", "male", "female", "male" )
age <- c("adult", "adult", "adult", "adult", "adult", "adult", "young", "young",
"adult", "young", "young", "adult", "young", "young", "young", "adult",
"young", "adult", "young", "young", "young", "young", "adult", "young",
"young", "young", "young", "young", "young", "adult")
result <- c(7.1, 11.0, 5.8, 8.8, 8.6, 8.0, 3.0, 5.2, 3.4, 4.0, 5.3, 11.3, 4.6, 6.4,
13.5, 4.7, 5.1, 7.3, 9.5, 5.4, 3.7, 6.2, 10.0, 1.7, 2.9, 3.2, 4.7, 4.9,
9.8, 9.4)
datos <- data.frame(subject, sex, age, result)
head(datos, 4)
## subject sex age result
## 1 1 female adult 7.1
## 2 2 male adult 11.0
## 3 3 male adult 5.8
## 4 4 female adult 8.8
skim(datos)
| Name | datos |
| Number of rows | 30 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| factor | 1 |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| sex | 0 | 1 | 4 | 6 | 0 | 2 | 0 |
| age | 0 | 1 | 5 | 5 | 0 | 2 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| subject | 0 | 1 | FALSE | 30 | 1: 1, 2: 1, 3: 1, 4: 1 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| result | 0 | 1 | 6.48 | 2.93 | 1.7 | 4.62 | 5.6 | 8.75 | 13.5 | ▅▇▃▅▂ |
p1 <- ggplot(data = datos, mapping = aes(x = age, y = result)) +
geom_boxplot() +
theme_bw()
p2 <- ggplot(data = datos, mapping = aes(x = sex, y = result)) +
geom_boxplot() +
theme_bw()
p3 <- ggplot(data = datos, mapping = aes(x = age, y = result, colour = sex)) +
geom_boxplot() +
theme_bw()
grid.arrange(p1, p2, ncol = 2)
p3
ggplot(data = datos, aes(x = age, y = result, colour = sex, group = sex)) +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun = mean, geom = "line") +
labs(y = 'mean (result)') +
theme_bw()
ggplot(data = datos, aes(x = sex, y = result, colour = age, group = age)) +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun = mean, geom = "line") +
labs(y = 'mean (result)') +
theme_bw()
anova_2vias <- aov(formula = result ~ sex*age, data = datos)
summary(anova_2vias)
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 16.08 16.08 4.038 0.0550 .
## age 1 38.96 38.96 9.786 0.0043 **
## sex:age 1 89.61 89.61 22.509 6.6e-05 ***
## Residuals 26 103.51 3.98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etaSquared(anova_2vias)
## eta.sq eta.sq.part
## sex 0.04842176 0.1040130
## age 0.15699885 0.2734635
## sex:age 0.36110080 0.4640119
#install.packages("emmeans")
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
aov2.emm = emmeans(anova_2vias, specs=~sex*age)
#Este modelo es correcto cuando NO hay interacción
emmeans(aov2.emm, pairwise~sex)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## sex emmean SE df lower.CL upper.CL
## female 7.35 0.604 26 6.10 8.59
## male 6.60 0.474 26 5.62 7.57
##
## Results are averaged over the levels of: age
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## female - male 0.747 0.768 26 0.973 0.3396
##
## Results are averaged over the levels of: age
emmeans(aov2.emm, pairwise~age)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## age emmean SE df lower.CL upper.CL
## adult 7.71 0.584 26 6.51 8.91
## young 6.24 0.499 26 5.21 7.26
##
## Results are averaged over the levels of: sex
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## adult - young 1.47 0.768 26 1.915 0.0665
##
## Results are averaged over the levels of: sex
emmeans(aov2.emm, pairwise~sex*age)
## $emmeans
## sex age emmean SE df lower.CL upper.CL
## female adult 6.26 0.892 26 4.43 8.09
## male adult 9.16 0.754 26 7.61 10.71
## female young 8.43 0.815 26 6.76 10.11
## male young 4.04 0.576 26 2.86 5.23
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## female adult - male adult -2.897 1.170 26 -2.480 0.0870
## female adult - female young -2.173 1.210 26 -1.799 0.2966
## female adult - male young 2.218 1.060 26 2.089 0.1833
## male adult - female young 0.724 1.110 26 0.652 0.9139
## male adult - male young 5.115 0.949 26 5.391 0.0001
## female young - male young 4.392 0.998 26 4.402 0.0009
##
## P value adjustment: tukey method for comparing a family of 4 estimates
id <- as.factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24))
resultado <- c(7,9,10,8,8,11,10,6,13,15,11,10, 6,5,4,7,4,6,8,5,10,9,6,8)
desarrollo <- as.factor(c("precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "precoz", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío", "tardío"))
comp_padres <- as.factor(c("a.mcomp", "a.mcomp", "a.mcomp", "a.mcomp", "b.pcomp", "b.pcomp", "b.pcomp", "b.pcomp","c.ambos", "c.ambos", "c.ambos", "c.ambos", "a.mcomp", "a.mcomp", "a.mcomp", "a.mcomp", "b.pcomp", "b.pcomp", "b.pcomp", "b.pcomp","c.ambos", "c.ambos", "c.ambos", "c.ambos"))
datos <- data.frame(id,desarrollo, comp_padres, resultado)
head(datos, 4)
## id desarrollo comp_padres resultado
## 1 1 precoz a.mcomp 7
## 2 2 precoz a.mcomp 9
## 3 3 precoz a.mcomp 10
## 4 4 precoz a.mcomp 8
datos %>%
group_by(desarrollo, comp_padres)%>%
skim(resultado)
| Name | Piped data |
| Number of rows | 24 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | desarrollo, comp_padres |
Variable type: numeric
| skim_variable | desarrollo | comp_padres | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| resultado | precoz | a.mcomp | 0 | 1 | 8.50 | 1.29 | 7 | 7.75 | 8.5 | 9.25 | 10 | ▇▇▁▇▇ |
| resultado | precoz | b.pcomp | 0 | 1 | 8.75 | 2.22 | 6 | 7.50 | 9.0 | 10.25 | 11 | ▇▇▁▇▇ |
| resultado | precoz | c.ambos | 0 | 1 | 12.25 | 2.22 | 10 | 10.75 | 12.0 | 13.50 | 15 | ▇▁▃▁▃ |
| resultado | tardío | a.mcomp | 0 | 1 | 5.50 | 1.29 | 4 | 4.75 | 5.5 | 6.25 | 7 | ▇▇▁▇▇ |
| resultado | tardío | b.pcomp | 0 | 1 | 5.75 | 1.71 | 4 | 4.75 | 5.5 | 6.50 | 8 | ▇▇▇▁▇ |
| resultado | tardío | c.ambos | 0 | 1 | 8.25 | 1.71 | 6 | 7.50 | 8.5 | 9.25 | 10 | ▇▁▇▇▇ |
ggplot(data = datos, aes(x = desarrollo, y = resultado, colour = comp_padres, group = comp_padres)) +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun = mean, geom = "line") +
labs(y = 'mean (result)') +
theme_bw()
ggplot(data = datos, aes(x = comp_padres, y = resultado, colour = desarrollo, group = desarrollo)) +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun = mean, geom = "line") +
labs(y = 'mean (result)') +
theme_bw()
anova_simple1 <- aov(formula = resultado ~ comp_padres, data = datos)
summary(anova_simple1)
## Df Sum Sq Mean Sq F value Pr(>F)
## comp_padres 2 52.33 26.167 4.396 0.0254 *
## Residuals 21 125.00 5.952
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_simple2 <- aov(formula = resultado ~ desarrollo, data = datos)
summary(anova_simple2)
## Df Sum Sq Mean Sq F value Pr(>F)
## desarrollo 1 66.67 66.67 13.25 0.00144 **
## Residuals 22 110.67 5.03
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_factorial1 <- aov(formula = resultado ~ comp_padres + desarrollo, data = datos)
summary(anova_factorial1)
## Df Sum Sq Mean Sq F value Pr(>F)
## comp_padres 2 52.33 26.17 8.971 0.001656 **
## desarrollo 1 66.67 66.67 22.857 0.000114 ***
## Residuals 20 58.33 2.92
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_factorial2 <- aov(formula = resultado ~ comp_padres * desarrollo, data = datos)
summary(anova_factorial2)
## Df Sum Sq Mean Sq F value Pr(>F)
## comp_padres 2 52.33 26.17 8.263 0.002845 **
## desarrollo 1 66.67 66.67 21.053 0.000228 ***
## comp_padres:desarrollo 2 1.33 0.67 0.211 0.812124
## Residuals 18 57.00 3.17
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#install.packages("emmeans")
library(emmeans)
aov2.emm = emmeans(anova_factorial2, specs=~comp_padres * desarrollo)
#Este modelo es correcto cuando NO hay interacción
emmeans(aov2.emm, pairwise~comp_padres)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## comp_padres emmean SE df lower.CL upper.CL
## a.mcomp 7.00 0.629 18 5.68 8.32
## b.pcomp 7.25 0.629 18 5.93 8.57
## c.ambos 10.25 0.629 18 8.93 11.57
##
## Results are averaged over the levels of: desarrollo
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## a.mcomp - b.pcomp -0.25 0.89 18 -0.281 0.9575
## a.mcomp - c.ambos -3.25 0.89 18 -3.653 0.0049
## b.pcomp - c.ambos -3.00 0.89 18 -3.372 0.0091
##
## Results are averaged over the levels of: desarrollo
## P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(aov2.emm, pairwise~desarrollo)
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## desarrollo emmean SE df lower.CL upper.CL
## precoz 9.83 0.514 18 8.75 10.91
## tardío 6.50 0.514 18 5.42 7.58
##
## Results are averaged over the levels of: comp_padres
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## precoz - tardío 3.33 0.726 18 4.588 0.0002
##
## Results are averaged over the levels of: comp_padres
emmeans(aov2.emm, pairwise~desarrollo*comp_padres)
## $emmeans
## desarrollo comp_padres emmean SE df lower.CL upper.CL
## precoz a.mcomp 8.50 0.89 18 6.63 10.37
## tardío a.mcomp 5.50 0.89 18 3.63 7.37
## precoz b.pcomp 8.75 0.89 18 6.88 10.62
## tardío b.pcomp 5.75 0.89 18 3.88 7.62
## precoz c.ambos 12.25 0.89 18 10.38 14.12
## tardío c.ambos 8.25 0.89 18 6.38 10.12
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## precoz a.mcomp - tardío a.mcomp 3.00 1.26 18 2.384 0.2133
## precoz a.mcomp - precoz b.pcomp -0.25 1.26 18 -0.199 0.9999
## precoz a.mcomp - tardío b.pcomp 2.75 1.26 18 2.185 0.2913
## precoz a.mcomp - precoz c.ambos -3.75 1.26 18 -2.980 0.0735
## precoz a.mcomp - tardío c.ambos 0.25 1.26 18 0.199 0.9999
## tardío a.mcomp - precoz b.pcomp -3.25 1.26 18 -2.583 0.1524
## tardío a.mcomp - tardío b.pcomp -0.25 1.26 18 -0.199 0.9999
## tardío a.mcomp - precoz c.ambos -6.75 1.26 18 -5.364 0.0005
## tardío a.mcomp - tardío c.ambos -2.75 1.26 18 -2.185 0.2913
## precoz b.pcomp - tardío b.pcomp 3.00 1.26 18 2.384 0.2133
## precoz b.pcomp - precoz c.ambos -3.50 1.26 18 -2.782 0.1067
## precoz b.pcomp - tardío c.ambos 0.50 1.26 18 0.397 0.9985
## tardío b.pcomp - precoz c.ambos -6.50 1.26 18 -5.166 0.0008
## tardío b.pcomp - tardío c.ambos -2.50 1.26 18 -1.987 0.3866
## precoz c.ambos - tardío c.ambos 4.00 1.26 18 3.179 0.0499
##
## P value adjustment: tukey method for comparing a family of 6 estimates
Queremos estudiar la influencia que tienen el el sexo del profesor y el tipo de colegio sobre los sintomas el clima escolar. Para ello se mide con una escala de clima escolar (variable cuantitativa dependiente) tres tipos de colegios (municipal, PS y PP) y estudiantes (H y M).
clima <- c(15.29, 15.89, 16.02, 16.56, 15.46, 16.91, 16.99, 17.27, 16.85,
16.35, 17.23, 17.81, 17.74, 18.02, 18.37, 12.07, 12.42, 12.73,
13.02, 12.05, 12.92, 13.01, 12.21, 13.49, 14.01, 13.30, 12.82,
12.49, 13.55, 14.53)
sexo <- c(rep(c("Masculino", "Femenino"), c(15,15)))
colegio <- rep(c("a.M","b.PS", "c.PP"), each = 5, times = 2)
datos <- data.frame(sexo = sexo, colegio = as.factor(colegio),
clima = clima)
head(datos)
## sexo colegio clima
## 1 Masculino a.M 15.29
## 2 Masculino a.M 15.89
## 3 Masculino a.M 16.02
## 4 Masculino a.M 16.56
## 5 Masculino a.M 15.46
## 6 Masculino b.PS 16.91
library(ggplot2)
library(gridExtra)
p1 <- ggplot(data = datos, aes(x = sexo, y = clima)) +
geom_boxplot() + theme_bw()
p2 <- ggplot(data = datos, aes(x = colegio, y = clima)) +
geom_boxplot() + theme_bw()
p3 <- ggplot(data = datos, aes(x = sexo, y = clima, colour = colegio)) +
geom_boxplot() + theme_bw()
grid.arrange(p1, p2, ncol = 2)
p3
interaction.plot(sexo, colegio, clima, data = datos, col = 1:3, type = "b")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
## Warning in axis(...): "data" is not a graphical parameter
interaction.plot(colegio, sexo, clima, data = datos,col = 2:3, type = "b")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
## Warning in axis(...): "data" is not a graphical parameter
ggplot(data = datos, aes(x = sexo, y = clima, colour = colegio,
group = colegio)) +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun = mean, geom = "line") +
labs(y = 'mean (resistencia)') +
theme_bw()
ggplot(data = datos, aes(x = colegio, y = clima, colour = sexo,
group = sexo)) +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun = mean, geom = "line") +
labs(y = 'mean (resistencia)') +
theme_bw()
anova <- aov(clima ~ colegio + sexo, data = datos)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## colegio 2 10.41 5.21 15.53 3.65e-05 ***
## sexo 1 112.68 112.68 336.02 < 2e-16 ***
## Residuals 26 8.72 0.34
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova <- aov(clima ~ colegio * sexo, data = datos)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## colegio 2 10.41 5.21 17.563 2.00e-05 ***
## sexo 1 112.68 112.68 380.082 3.19e-16 ***
## colegio:sexo 2 1.60 0.80 2.705 0.0873 .
## Residuals 24 7.11 0.30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#install.packages("lsr")
etaSquared(anova)
## eta.sq eta.sq.part
## colegio 0.07900327 0.5940887
## sexo 0.85485219 0.9406061
## colegio:sexo 0.01216553 0.1839235
plot(anova)