BIOL 297: Schedule for week of May 11-15

Assignments:

  • Complete all labs by Tuesday, May 12 @ 11:59 PM HST

  • Finish your Final Project! R Script and short blog-style report are due by classtime (Thursday, May 14 @ 12 PM HST). See this page and the Laulima announcement for complete details on what to turn in.

Schedule

Thursday, May 14 - Presenations

Code demo (Correlation)

Watch video of code demo here.


# Demonstrate Pearson's Product Moment correlation test

# Load libraries
library(dplyr)
library(ggplot2)
library(scales)

# Mammals dataset from Lab 13 (Data/mammals.csv)

mammals <- read.csv("data/mammals.csv")
mammals$log_body_mass_kg <- log(mammals$body_mass_kg)
mammals$log_brain_mass_g <- log(mammals$brain_mass_g)

ggplot(mammals, aes(body_mass_kg, brain_mass_g)) +
  geom_point() +
  scale_x_log10(label = label_number(accuracy = 1)) +
  scale_y_log10(label = label_number(accuracy = 1)) +
  xlab("Body mass [kg] (log-scale)") +
  ylab("Brain mass [g] (log-scale)") +
  theme_bw()

# 1. State H_0 and H_A ----

# H_0: The population correlation coefficient (rho) between body mass and brain mass is exactly zero
rho_0 <- 0 # null hypothesis

# H_A: The population correlation coefficient (rho) between body mass and brain mass is not zero

# 2. Calculate a test statistic ----

X <- mammals$log_body_mass_kg # explanatory variable
Y <- mammals$log_brain_mass_g # response variable
n <- length(X)

Xbar <- mean(X)
Ybar <- mean(Y)


r <- sum((X - Xbar) * (Y - Ybar)) / (sqrt(sum((X - Xbar) ^ 2)) * sqrt(sum((Y - Ybar) ^ 2)))

SE_r <- sqrt((1 - r ^ 2) / (n - 2))

test_stat <- r / SE_r

# 3. Generate the null distribution

sampling_dist <- data.frame(Y = seq(-4, 27, 0.1))
sampling_dist$probability <- dt(sampling_dist$Y, df = n - 2)

head(sampling_dist)

sampling_dist_plot <- ggplot(sampling_dist, aes(Y, probability)) +
  geom_area(fill = "tomato", alpha = 0.5) +
  geom_line() +
  xlab(expression(italic(t))) +
  ylab("Probability density") +
  theme_bw() +
  theme(
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  )

sampling_dist_plot

sampling_dist_plot +
  geom_vline(xintercept = test_stat, size = 2)

head(sampling_dist)

# 4. Find a critical value at specified alpha ----

alpha <- 0.05

critical_value <- qt(
  p = alpha / 2,
  df = n - 2,
  lower.tail = FALSE
)

critical_value

sampling_dist_plot + 
  geom_vline(xintercept = critical_value * c(-1, 1), size = 2)

# 5. Find the P-value

P_value <- 2 * pt(
  q = abs(test_stat),
  df = n - 2,
  lower.tail = FALSE
)

P_value

# Decide ----
P_value < alpha

# The quick way
cor.test(X, Y)

Related