BIOL 297: Schedule for week of May 11-15
Assignments:
Complete all labs by Tuesday, May 12 @ 11:59 PM HST
Finish your Final Project! R Script and short blog-style report are due by classtime (Thursday, May 14 @ 12 PM HST). See this page and the Laulima announcement for complete details on what to turn in.
Schedule
Thursday, May 14 - Presenations
- Final Project presentations @ 12 PM HST. Join the Google Meeting using this link: meet.google.com/xxj-wayo-kxf
Code demo (Correlation)
Watch video of code demo here.
# Demonstrate Pearson's Product Moment correlation test
# Load libraries
library(dplyr)
library(ggplot2)
library(scales)
# Mammals dataset from Lab 13 (Data/mammals.csv)
mammals <- read.csv("data/mammals.csv")
mammals$log_body_mass_kg <- log(mammals$body_mass_kg)
mammals$log_brain_mass_g <- log(mammals$brain_mass_g)
ggplot(mammals, aes(body_mass_kg, brain_mass_g)) +
geom_point() +
scale_x_log10(label = label_number(accuracy = 1)) +
scale_y_log10(label = label_number(accuracy = 1)) +
xlab("Body mass [kg] (log-scale)") +
ylab("Brain mass [g] (log-scale)") +
theme_bw()
# 1. State H_0 and H_A ----
# H_0: The population correlation coefficient (rho) between body mass and brain mass is exactly zero
rho_0 <- 0 # null hypothesis
# H_A: The population correlation coefficient (rho) between body mass and brain mass is not zero
# 2. Calculate a test statistic ----
X <- mammals$log_body_mass_kg # explanatory variable
Y <- mammals$log_brain_mass_g # response variable
n <- length(X)
Xbar <- mean(X)
Ybar <- mean(Y)
r <- sum((X - Xbar) * (Y - Ybar)) / (sqrt(sum((X - Xbar) ^ 2)) * sqrt(sum((Y - Ybar) ^ 2)))
SE_r <- sqrt((1 - r ^ 2) / (n - 2))
test_stat <- r / SE_r
# 3. Generate the null distribution
sampling_dist <- data.frame(Y = seq(-4, 27, 0.1))
sampling_dist$probability <- dt(sampling_dist$Y, df = n - 2)
head(sampling_dist)
sampling_dist_plot <- ggplot(sampling_dist, aes(Y, probability)) +
geom_area(fill = "tomato", alpha = 0.5) +
geom_line() +
xlab(expression(italic(t))) +
ylab("Probability density") +
theme_bw() +
theme(
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)
sampling_dist_plot
sampling_dist_plot +
geom_vline(xintercept = test_stat, size = 2)
head(sampling_dist)
# 4. Find a critical value at specified alpha ----
alpha <- 0.05
critical_value <- qt(
p = alpha / 2,
df = n - 2,
lower.tail = FALSE
)
critical_value
sampling_dist_plot +
geom_vline(xintercept = critical_value * c(-1, 1), size = 2)
# 5. Find the P-value
P_value <- 2 * pt(
q = abs(test_stat),
df = n - 2,
lower.tail = FALSE
)
P_value
# Decide ----
P_value < alpha
# The quick way
cor.test(X, Y)