-
Notifications
You must be signed in to change notification settings - Fork 0
Data Preparation 2015
Data from three testing periods were provided to the Center for Assessment in early June 2016. The main purpose of this data is to serve as the prior test scores (i.e. independent variables) to be used in the Spring 2016 SGP analyses.
In the source code below, the data supplied by Pearson are comma seperated files for each state in the PARCC consortium. These files are
located in the states' relative folder Data/Base_Files. A custom function is used to read each file into R and simultaneously combined
into a single data table. The data is then cleaned up variable by variable to ensure that it conforms to data naming conventions used
in the SGP software package.
The actual R code script is available here.
################################################################################
### ###
### Create Initial PARCC LONG Data for Fall 2016 Analyses ###
### ###
################################################################################
### Load required packages
require(SGP)
require(data.table)
setwd("PARCC")
###
### Read in Spring 2015 & Fall 2016 Pearson base data
###
#### Set names based on Pearson file layout
parcc.var.names <-
c("AssessmentYear", "StateAbbreviation", "PARCCStudentIdentifier",
"GradeLevelWhenAssessed", "Period", "TestCode", "SummativeScoreRecordUUID",
"StudentTestUUID", "SummativeScaleScore", "IRTTheta", "SummativeCSEM",
"Filler", "TestFormat")
center.var.names <-
c("StudentGrowthPercentileComparedtoState",
"StudentGrowthPercentileComparedtoPARCC","SGPPreviousTestCodeState",
"SGPPreviousTestCodePARCC", "SGPUpperBoundState", "SGPLowerBoundState",
"SGPUpperBoundPARCC", "SGPLowerBoundPARCC")
all.var.names <- c(head(parcc.var.names,-1), center.var.names, "TestFormat")
#### Function to read in individual state files
read.parcc <- function(state, year, Fall=FALSE) {
tmp.name <- gsub(" ", "_", SGP:::getStateAbbreviation(state, type="state"))
if(tmp.name=="WASHINGTON_DC") tmp.name <- "Washington_DC"
tmp.files <- list.files(file.path(tmp.name, "Data/Base_Files"))
my.zip <- grep(year, tmp.files, value=TRUE)
my.file <- gsub(".zip", "", my.zip)
tmp.dir <- getwd()
setwd("~")
system(paste("unzip '", file.path(tmp.dir, tmp.name, "Data/Base_Files", my.zip), "'", sep=""))
TMP <- fread(my.file, sep=',', header=FALSE, skip=1L, colClasses=rep("character", 20))
unlink(my.file)
setwd(tmp.dir)
if (year=="2014-2015" | Fall==TRUE) {
setnames(TMP, head(all.var.names,-1))
} else setnames(TMP, all.var.names)
return(TMP)
}
#### Fall 2014 / Spring 2015
PARCC_Data_LONG_2015 <- rbindlist(list(
read.parcc("CO", "2014-2015"), read.parcc("IL", "2014-2015"),
read.parcc("MD", "2014-2015"), read.parcc("MA", "2014-2015"),
read.parcc("NJ", "2014-2015"), read.parcc("NM", "2014-2015"),
read.parcc("RI", "2014-2015"), read.parcc("DC", "2014-2015")))
setkey(PARCC_Data_LONG_2015, PARCCStudentIdentifier, TestCode, Period)
#### Merge in the TestFormat variable from 2015 data obtained from client states
PARCC_TEST_MODE <- as.data.table(read.csv("./Colorado/Data/Base_Files/PARCC_TEST_MODE.csv"))
PARCC_TEST_MODE[, SASID := NULL]
setnames(PARCC_TEST_MODE, "summativeScoreRecordUuid", "SummativeScoreRecordUUID")
setkey(PARCC_TEST_MODE, SummativeScoreRecordUUID)
setkey(PARCC_Data_LONG_2015, SummativeScoreRecordUUID)
PARCC_Data_LONG_2015 <- PARCC_TEST_MODE[PARCC_Data_LONG_2015]
setnames(PARCC_Data_LONG_2015, "PARCC_MODE", "TestFormat")
load("./Rhode_Island/Data/Rhode_Island_Data_LONG_2014_2015.Rdata")
RI.15 <- as.data.table(foreign::read.spss(
"./Rhode_Island/Data/Base_Files/2015 Summative File with Test Format.sav",
to.data.frame=TRUE, use.value.labels=FALSE))
RI.15 <- RI.15[,c("summativeScoreRecordUuid", "TestFormat"), with=FALSE]
setnames(RI.15, "summativeScoreRecordUuid", "SummativeScoreRecordUUID")
setkey(RI.15, SummativeScoreRecordUUID)
setkey(PARCC_Data_LONG_2015, SummativeScoreRecordUUID)
PARCC_Data_LONG_2015 <- RI.15[PARCC_Data_LONG_2015]
PARCC_Data_LONG_2015[which(!is.na(i.TestFormat)), TestFormat := i.TestFormat]
PARCC_Data_LONG_2015[, i.TestFormat := NULL]
PARCC_Data_LONG_2015 <- PARCC_Data_LONG_2015[, parcc.var.names, with=FALSE]
save(PARCC_Data_LONG_2015, file="./PARCC/Data/Base_Files/PARCC_Data_LONG_2015.Rdata")
#### Fall 2015
setwd("./PARCC")
PARCC_Data_LONG_2016 <- rbindlist(list(
read.parcc("IL", "2016_SGPO_D201605", Fall=TRUE),
read.parcc("MD", "2016_SGPO_D201605", Fall=TRUE),
read.parcc("NJ", "2016_SGPO_D201605", Fall=TRUE),
read.parcc("NM", "2016_SGPO_D201605", Fall=TRUE),
read.parcc("RI", "2016_SGPO_D201605", Fall=TRUE)))
setkey(PARCC_Data_LONG_2016, PARCCStudentIdentifier, TestCode, Period)
#### Create PARCC_Data_LONG
PARCC_Data_LONG <- rbindlist(list(
PARCC_Data_LONG_2015[, parcc.var.names, with=FALSE],
PARCC_Data_LONG_2016[, head(parcc.var.names, -1), with=FALSE]), fill = TRUE)
###
### Data Cleaning - Create Required SGP Variables
###
#### ID
setnames(PARCC_Data_LONG, "PARCCStudentIdentifier", "ID")
#### CONTENT_AREA from TestCode
PARCC_Data_LONG[, CONTENT_AREA := factor(TestCode)]
levels(PARCC_Data_LONG$CONTENT_AREA) <- c("ALGEBRA_I", "ALGEBRA_II", rep("ELA", 9),
"GEOMETRY", rep("MATHEMATICS", 6), "INTEGRATED_MATH_1", "INTEGRATED_MATH_2", "INTEGRATED_MATH_3")
PARCC_Data_LONG[, CONTENT_AREA := as.character(CONTENT_AREA)]
#### GRADE from TestCode
PARCC_Data_LONG[, GRADE := gsub("ELA|MAT", "", TestCode)]
PARCC_Data_LONG[, GRADE := as.character(as.numeric(GRADE))]
PARCC_Data_LONG[which(is.na(GRADE)), GRADE := "EOCT"]
PARCC_Data_LONG[, GRADE := as.character(GRADE)]
#### YEAR
PARCC_Data_LONG[, YEAR := gsub("-", "_", AssessmentYear)]
PARCC_Data_LONG[which(Period == "FallBlock"), YEAR := paste(YEAR, "1", sep=".")]
PARCC_Data_LONG[which(Period == "Spring"), YEAR := paste(YEAR, "2", sep=".")]
#### Valid Cases
PARCC_Data_LONG[, VALID_CASE := "VALID_CASE"]
#### Invalidate Cases with missing IDs - 0 invalid in FINAL data
PARCC_Data_LONG[which(is.na(ID)), VALID_CASE := "INVALID_CASE"]
#### Establish seperate Theta and Scale Score long data sets
PARCC_Data_LONG_SS <- copy(PARCC_Data_LONG)
PARCC_Data_LONG_SS[, c("IRTTheta", "Filler") := NULL]
PARCC_Data_LONG_SS[, CONTENT_AREA := paste(CONTENT_AREA, "SS", sep="_")]
setnames(PARCC_Data_LONG_SS,
c("SummativeScaleScore", "SummativeCSEM"),
c("SCALE_SCORE", "SCALE_SCORE_CSEM"))
#### Theta data set - create IRT CSEM First
scaling.constants <- as.data.table(read.csv("./PARCC/Data/Base_Files/2014-2015 PARCC Scaling Constants.csv"))
setkey(scaling.constants, CONTENT_AREA, GRADE)
setkey(PARCC_Data_LONG, CONTENT_AREA, GRADE)
PARCC_Data_LONG <- scaling.constants[PARCC_Data_LONG]
PARCC_Data_LONG[, SCALE_SCORE_CSEM := (as.numeric(SummativeCSEM))/a] # NO -b here...
PARCC_Data_LONG[, c("a", "b", "Filler") := NULL]
setnames(PARCC_Data_LONG, c("IRTTheta", "SummativeScaleScore", "SummativeCSEM"), c("SCALE_SCORE", "SCALE_SCORE_ACTUAL", "SCALE_SCORE_CSEM_ACTUAL"))
#### Stack Theta and SS Data
PARCC_Data_LONG <- rbindlist(list(PARCC_Data_LONG, PARCC_Data_LONG_SS), fill=TRUE)
#### Save Initial LONG Data
PARCC_Data_LONG[, GRADE := as.character(GRADE)]
PARCC_Data_LONG[, SCALE_SCORE := as.numeric(SCALE_SCORE)]
PARCC_Data_LONG[, SCALE_SCORE_CSEM := as.numeric(SCALE_SCORE_CSEM)]
PARCC_Data_LONG[, SCALE_SCORE_ACTUAL := as.numeric(SCALE_SCORE_ACTUAL)]
PARCC_Data_LONG[, SCALE_SCORE_CSEM_ACTUAL := as.numeric(SCALE_SCORE_CSEM_ACTUAL)]
save(PARCC_Data_LONG, file = "./PARCC/Data/PARCC_Data_LONG.Rdata")
##### Create SQLite Databases for each year / period
require(RSQLite)
parcc.db <- "./PARCC/Data/PARCC_Data_LONG.sqlite"
dbWriteTable(dbConnect(SQLite(), dbname = parcc.db), name = "PARCC_Data_LONG_2015_1",
value=PARCC_Data_LONG[YEAR == "2014_2015.1"], overwrite=TRUE)
dbWriteTable(dbConnect(SQLite(), dbname = parcc.db), name = "PARCC_Data_LONG_2015_2",
value=PARCC_Data_LONG[YEAR == "2014_2015.2"], overwrite=TRUE)
dbWriteTable(dbConnect(SQLite(), dbname = parcc.db), name = "PARCC_Data_LONG_2016_1",
value=PARCC_Data_LONG[YEAR == "2015_2016.1"], overwrite=TRUE)SGP - Student Growth Percentiles SGP Blog | SGP GitHub Repo | SGP on CRAN