There are two downloads for this workshop. Use the links below to download the dataset and R script used:
If you do not already have R and R Studio installed, you can download them both here
Use the site below to navigate the R code we will be using in chunks. After the workshop, this site will be converted into multiple pages, each covering a different topic in an offline-workshop format.
setwd("C:/Users/Kari/OneDrive - Kansas State University/NI Core")
data = read.csv("ratCoffeeData.csv", stringsAsFactors = T)
attach(data)
View(data)
# Simple plots
hist(buttonpresses)
plot(coffee)
plot(sex)
hist(pelletsconsumed)
mean(buttonpresses)
#mean(buttonpresses, na.rm = T) # If there is missing data, na.rm = T will remove it
sd(buttonpresses)
#sd(buttonpresses, na.rm = T)
summary(buttonpresses)
#summary(buttonpresses, na.rm = T)
install.packages("ggplot2") # install ggplot2 if you do not already have it
library(ggplot2) # load in the ggplot2 library for plotting
# Simple plot of raw data points
ggplot(aes(x = pelletsconsumed, y = buttonpresses), data = data) + geom_point()
# Different types of correlations
cor(buttonpresses, pelletsconsumed) # note: <use = "complete.obs"> for NAs
cor(buttonpresses, pelletsconsumed, method='pearson') # identical to above
cor(buttonpresses, pelletsconsumed, method='spearman') # rank-ordered correlation and nonlinear correlations
# Plot of raw data with a line through it to visualize the trend
ggplot(aes(x = pelletsconsumed, y = buttonpresses), data = data) + geom_point() + geom_smooth(method=lm, se=TRUE)
# Visualize the two means we want to compare
ggplot(aes(x = sex, y = buttonpresses), data = data) + geom_bar(stat = "summary", color = "purple", fill = "purple")+
geom_errorbar(stat = "summary", width=0.25)
# Look at the individual means for Males and Females
males <- data [which(sex == "Male"),]
summary(males$buttonpresses)
females <- data [which(sex == "Female"),]
summary(females$buttonpresses)
# Run t-test
t.test(buttonpresses~ sex)
# Look at the outcome data. Does it look normal?
hist(data$buttonpresses)
# Run a normality test (these can be misleading if you have a lot of data -- eyeballing it is okay!)
shapiro.test(data$buttonpresses)
# Visualize the groups we want to compare
ggplot(aes(x = coffee, y = buttonpresses), data = data) + geom_bar(stat = "summary", color = "purple", fill = "purple")+
geom_errorbar(stat = "summary", width=0.25)
# Run the ANOVA and save it to an object
anova1 <- aov(buttonpresses ~ coffee)
# Get a summary of that anova object
summary(anova1)
# Post-hoc tests (can use different adjustments)
pairwise.t.test(buttonpresses, coffee, p.adj = "none") # aka 'LSD' adjustment
pairwise.t.test(buttonpresses, coffee, p.adj = "bonf")
pairwise.t.test(buttonpresses, coffee, p.adj = "fdr")
# or just default to Tukey
TukeyHSD(anova1)
# Run regression and save it to an object
regression <- lm(buttonpresses ~ coffee)
# Get a summary of the regression object
summary(regression)
# Get an ANOVA table for the regression output
anova(regression)
# Test a more complex model, adding in sex as a predictor
regression2 <- lm(buttonpresses ~ coffee + sex)
# Summary of more complex model
summary(regression2)
# Test an even more complex model,
# introducing an interaction between coffee and sex
regression3 <- lm(buttonpresses ~ coffee * sex)
# Summary of most complex model
summary(regression3)
# model comparisons
anova(regression, regression2, regression3)
# any significant differences? Should there be? Additional predictors useful or no?
# Get specific result values for regression 3
summary(regression3)$fstatistic
summary(regression3)$r.squared
summary(regression3)$adj.r.squared
summary(regression3)$sigma # Residual Standard Error
# Post-hoc tests
library(emmeans)
emmeans(regression3, ~coffee*sex) # all interactions
emmeans(regression3, ~coffee|sex) # coffee means grouped by sex
emmeans(regression3, ~sex|coffee) # sex means grouped by coffee
emmeans(regression3, pairwise ~coffee, adjust = "tukey") # compare coffee means
emmeans(regression3, pairwise ~coffee*sex, adjust = "tukey") # compare coffee and sex combination means
# Note: if you compare ALL combinations, the Tukey adjustment might be too strong.
emmeans(regression3, pairwise ~coffee|sex, adjust = "tukey") # compare coffee means grouped by sex
```
emmeans(regression3, pairwise ~sex|coffee, adjust = "tukey") # compare sex means grouped by coffee
# Get the estimated marginal means in a dataframe of its own for plotting
toplot <- as.data.frame(emmeans(regression3, ~coffee*sex))
# Check out the first couple lines of this dataframe
head(toplot)
# Create the plot
ggplot(data = toplot, aes(x = coffee, y = emmean, col = sex, fill = sex)) + geom_bar(stat = "identity") + facet_wrap(~sex) +
geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), col = "black", width=0.25) + ylab("Button Presses") + xlab("Coffee")
Here are some handy resources to check out!
t-tests - Datacamp Note: this article is free, but we also get free Datacamp access through the Psych department - email me at karipayne@ksu.edu if you would like access!