Skip to content

CI for predicted probabilities #117

@MarieAgosta

Description

@MarieAgosta

Hi all,

So we manually did the predicted probabilities and now have to estimate the confidence intervals for our predicted probabilities… but the predict argument does not work for us.

We’ve searched around for ways to go about it and have yet to find anything that works. Do you have any suggestions? Below is the code, the bolded bit at the end is the new trouble spot.

Many thanks,

Marie

#manually collecting probabilities

####Black Millennial Woman
#1-probability for below-high-school educated black employed woman who didn't vote in 2008
prob_genY_ed1 <- (exp(-0.3448 - 0.3448 + 0.1249 + 0.6389))/(1+exp(-0.3448 - 0.3448 + 0.1249 + 0.6389)) 
#2-probability for high-school educated black employed woman who didn't vote in 2008
prob_genY_ed2 <- (exp(-0.3448 + 0.5263 + 0.1249 + 0.6389))/(1+exp(-0.3448 + 0.5263 + 0.1249 + 0.6389)) 
#3-probability for some post-high-school educated black employed woman who didn't vote in 2008
prob_genY_ed3 <- (exp(-0.3448 + 0.7955 + 0.1249 + 0.6389))/(1+exp(-0.3448 + 0.7955 + 0.1249 + 0.6389)) 
#4-probability for bachelor-educated black employed woman who didn't vote in 2008
prob_genY_ed4 <- (exp(-0.3448 + 0.8990 + 0.1249 + 0.6389))/(1+exp(-0.3448 + 0.8990 + 0.1249 + 0.6389)) 
#5-probability for graduate-educated black employed woman who didn't vote in 2008
prob_genY_ed5 <- (exp(-0.3448 + 1.5099 + 0.1249 + 0.6389))/(1+exp(-0.3448 + 1.5099 + 0.1249 + 0.6389)) 


####Black Baby Boomer Woman
#1-probability for below-high-school educated black employed woman who didn't vote in 2008
prob_boomer_ed1 <- (exp(-0.87481 - 0.87481 + 0.22553 -0.76681))/(1+exp(-0.87481 - 0.87481 + 0.22553 -0.7668)) 
#2-probability for high-school educated black employed woman who didn't vote in 2008
prob_boomer_ed2 <- (exp(-0.87481 + 0.81340 + 0.22553 -0.76681))/(1+exp(-0.87481 + 0.81340 + 0.22553 -0.76681)) 
#3-probability for some post-high-school educated black employed woman who didn't vote in 2008
prob_boomer_ed3 <- (exp(-0.87481 + 1.12412 + 0.22553 -0.76681))/(1+exp(-0.87481 + 1.12412 + 0.22553 -0.76681)) 
#4-probability for bachelor-educated black employed woman who didn't vote in 2008
prob_boomer_ed4 <- (exp(-0.87481 + 1.69306 + 0.22553 -0.76681))/(1+exp(-0.87481 + 1.69306 + 0.22553 -0.76681)) 
#5-probability for graduate-educated black employed woman who didn't vote in 2008
prob_boomer_ed5 <- (exp(-0.87481 + 1.54687 + 0.22553 -0.76681))/(1+exp(-0.87481 + 1.54687 + 0.22553 -0.76681)) 

#creating a dataframe of predicted variables for plotting and other uses
blackfem_genY_ed <- c(prob_genY_ed1, prob_genY_ed2, prob_genY_ed3, prob_genY_ed4, prob_genY_ed5)
blackfem_boomer_ed <- c(prob_boomer_ed1, prob_boomer_ed2, prob_boomer_ed3, prob_boomer_ed4, prob_boomer_ed5)
education <- cbind(blackfem_genY_ed, blackfem_boomer_ed)

#creating a data frame of fitted values for a hypothetical black woman

blackfem_genY <- with(anes_genY, data.frame(female = 1, race = 2, vote_2008 = 0, unemployed = 0, education = factor(1:5))) 

#this is an alternative we've played around with that includes the weighting columns
#used by the survey package as well
blackfem_genY <- with(anes_genY, data.frame(caseID = factor(1:5), weights = .506, psu = 1, strata = 6, female = 1, race = 2, vote_2008 = 0, unemployed = 0, education = factor(1:5))) 

#predict probability point estimates for each fitted value.

blackfem_genY$predicted_prob <- prediction(M_genY, data = blackfem_genY, type = "response")

blackfem_genY$predicted_prob <- predict(M_genY, newdata = blackfem_genY, type = "response", se = TRUE)
#another alternative
blackfem_genY$predicted_prob <- predict(M_genY, newdata = blackfem_genY, type = "response", 
                                        design = ANESdesign_genY)

#Now to plot this with confidence intervals.
#First, add the confidence intervals

blackfem_genY <- cbind(blackfem_genY, predict(M_genY, newdata = blackfem_genY, type="link", se=TRUE))
blackfem_genY <- within(blackfem_genY, {
  prob <- plogis(fit)
  LL <- plogis(fit - (1.96 * se.fit))
  UL <- plogis(fit + (1.96 * se.fit))
})

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions