-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.R
More file actions
54 lines (41 loc) · 1.28 KB
/
analysis.R
File metadata and controls
54 lines (41 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
library(car)
library(glm)
library(AER)
library(vcd)
library(DataExplorer)
library(lmtest)
library(knitr)
library()
# loads datasets
source('./queries.R')
# cleans datasets
source('./cleaning.R')
acs_clean <- clean_acs_data(df = acs_data)
prescription_clean <- clean_prescription_data(df = prescriptions)
overdoses_clean <- clean_overdose_data(df = overdoses)
# join cleaned datasets
opioid <- acs_clean %>%
inner_join(prescription_clean,
by = 'geoid') %>%
inner_join(overdoses_clean,
by = 'geoid') %>%
filter(crude_death_rate != 0) %>%
select(-c(geoid, name, deaths, population))
png("test.png", height = 50*nrow(head(opioid)), width = 200*ncol(head(opioid)))
grid.table(head(opioid))
dev.off()
# distribution of variables
DataExplorer::plot_histogram(opioid)
# base model
base_lm <- lm(crude_death_rate ~ ., data = opioid)
summary(base_lm)
# check for multicollinearity
car::vif(base_lm)
# test for heteroskedasticity
lmtest::bptest(base_lm)
# fitted vs residuals
plot(base_lm$fitted.values, base_lm$residuals,
xlab = 'Fitted Values', ylab = 'Residuals',
main = 'Base Linear Model')
# regression with heteroskedastic robust errors
coeftest(base_lm, vcov = vcovHC(base_lm, type = "HC0"))