Tag 5 Korrelation und Regression
5.1 Lösung Tag 4
siehe Tag 4.
5.2 Korrelation und Regression
https://figshare.com/articles/Storks_and_human_babies_data/839299/1 oder https://github.com/jannikbuhr/dataIntro19/tree/master/data
library(tidyverse)
library(broom)
<- read_csv("data/05_storks.csv")
storks storks
ggplot(storks, aes(Storks, Birth)) +
geom_point()
# explain log transform and relevance in biology
ggplot(storks, aes(Storks, Birth)) +
geom_point() +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10") +
annotation_logticks() +
theme_classic()
<- lm(Birth ~ Storks, data = storks)
model
modelsummary(model)
ggplot(storks, aes(Storks, Birth)) +
geom_point() +
scale_x_continuous(trans = "log10", breaks = scales::log_breaks()) +
scale_y_continuous(trans = "log10", breaks = scales::log_breaks()) +
annotation_logticks() +
theme_classic() +
geom_smooth(method = "lm")
<- lm(log(Birth) ~ log(Storks), data = storks)
model model
summary(model)
# explain correlation
# explain range of correlation coefficient
cor(storks$Storks, storks$Birth)
cor(log(storks$Storks), log(storks$Birth))
cor.test(storks$Storks, storks$Birth)
cor(storks$Storks, storks$Birth, method = "spearman")
<- lm(storks$Birth ~ storks$Storks)
model summary(model)
cor(storks$Storks, storks$Birth)^2
with(storks, {
plot(Storks, Birth)
abline(lm(Birth~Storks))
})
with(storks, {
plot(log(Storks), log(Birth))
abline(lm(log(Birth)~log(Storks)))
})
# show xkcd
# talk about confounding factors / variables
storks
5.3 Tricks für viele Daten / Dateien
<- dir("data", full.names = TRUE, pattern = ".csv")
paths map(paths, read_csv)
# map_dfr
5.4 Non-Linear Regression
<- as_tibble(Puromycin)
Puromycin Puromycin
%>% count(state) Puromycin
<- Puromycin %>%
puro_treat filter(state == "treated")
plot(puro_treat$conc, puro_treat$rate)
ggplot(puro_treat, aes(conc, rate)) + geom_point()
<- function(conc, Vm, K) {
michaelis_menten_fun * conc) / (K + conc)
(Vm }
= seq(0, 1, by = 0.001)
x = michaelis_menten_fun(x, Vm = 200, K = 0.2)
y plot(x, y)
plot(puro_treat$conc, puro_treat$rate)
curve(michaelis_menten_fun(conc = x, Vm = 200, K = 0.1),
add = TRUE,
col = "red",
from = 0, to = 1,
n = 100)
<- nls(rate ~ michaelis_menten_fun(conc, Vm, K),
model start = list(Vm = 200, K = 0.1),
data = puro_treat)
model
coef(model)
summary(model)$coefficients
<- coef(model)[1]
Vm_est <- coef(model)[2] K_est
plot(puro_treat$conc, puro_treat$rate)
curve(michaelis_menten_fun(x, Vm = Vm_est, K = K_est),
add = TRUE,
col = "red")
ggplot(puro_treat, aes(conc, rate)) +
geom_point() +
stat_function(fun = ~ michaelis_menten_fun(conc = .x, Vm = Vm_est, K = K_est),
col = "red") +
theme_classic()
<- nls(rate ~ SSmicmen(conc, Vm, K),
model2 data = puro_treat)
model2
predict(model2, newdata = list(conc = 1:10))
<- Puromycin %>%
nested_data group_nest(state)
nested_data
$data[[1]] nested_data
<- nested_data %>%
all_models mutate(model = map(data, ~ nls(rate ~ michaelis_menten_fun(conc, Vm, K),
start = list(Vm = 200, K = 0.1),
data = .x))
)
all_models
<- function(df) {
fit_my_model nls(rate ~ michaelis_menten_fun(conc, Vm, K),
start = list(Vm = 200, K = 0.1),
data = df)
}
fit_my_model(puro_treat)
<- nested_data %>%
all_models mutate(model = map(data, fit_my_model)
)
all_models
$model[[1]] all_models
<- all_models %>%
all_models mutate(fitted = map(model, augment),
coef = map(model, coef))
all_models
%>%
all_models unnest_wider(coef)
%>%
all_models select(state, fitted) %>%
unnest(fitted)
%>%
all_models select(state, fitted) %>%
unnest(fitted) %>%
ggplot(aes(conc, rate, color = state)) +
geom_point() +
geom_line(aes(y = .fitted))
<- function(model, from, to, n) {
make_data = seq(from, to, length.out = n)
conc = predict(model, newdata = list(conc = conc))
.fitted tibble(conc, .fitted)
}
<- all_models %>%
new_data mutate(new_data = map(model, make_data, 0, 1, 100)) %>%
select(state, new_data) %>%
unnest(new_data)
%>%
all_models select(state, fitted) %>%
unnest(fitted) %>%
ggplot(aes(conc, rate, color = state)) +
geom_point() +
geom_line(data = new_data, aes(y = .fitted))
5.5 Übung
5.5.1 Mit den Datasaurus Dozen Datensets
::datasaurus_dozen datasauRus
Visualisiere alle Sets gemeinsam in einem ggplot Scatterplot, nutze dazu
facet_wrap
.Füge mittels
geom_smooth
lineare Trendlinien hinzuFitte eine Lineare Regression and jedes der Datensets. Nutze dazu die Techniken aus R4DataScience: Many Models und das
broom
package.Analysiere die Fits.