Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 236 additions & 0 deletions KNMI/PrecipitationRadar/ImportPrecipitationRadarData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
# ImportPrecipitationRadarData.R
#
# The following R script is used to create a key value table tables
# "Time", "X", "Y", "Precipitation"
# from KNMI provided NetCDF files
# http://adaguc.knmi.nl/contents/datasets/productdescriptions/W_ADAGUC_Product_description_RADNL_OPER_R___25PCPRR_L3.html
#
# This program assumes the NetCDF files already to be downloaded (for instance from
# http://opendap.knmi.nl/knmi/thredds/catalog/radarprecipclim/RAD_NL25_RAC_MFBS_5min_NC/catalog.html
# ) and decompressed into the directory pointed to by the environment variable
# "ProjectData" pointing to a share (part of a path at least), and from there in
# directories "KNMI", "PrecipitationRadar", and "NetCDF", for instance ProjectData equals
# "/Volumes/ProjectData", combined it would make /Volumes/ProjectData/KNMI/PrecipitationRadar/NetCDF/
# If the environment variable is not set, the data is assumed to be decompressed into
# the path ProjectData/KNMI/PrecipitationRadar/ relative to where the program starts.
# The envirionment variable also controls the output location. If not set
# the path is KNMI/PrecipitationRadar/RData/ relative to where the program starts.
# Currently, the zip files offered by KNMI are organized per year and month (and [ignored] hour).
# This structure is assumed by the program: the program first determines a list of all
# directories one level up from "NetCDF", and iterates over each of them.
# Assuming these directories represent data for separate years, the program first checks
# for the presence of a output dataset associated with that year. If it finds one, it
# assumes that year is already processed and skips to the next directory (more about this below).
# If the program assumes the year is not yet processed, it lists all subdirectories one
# level up which it assumes to represent months of the year. In principle, the program
# processes each month in turn, saves the monthly data in a separate file and concatenates
# all per-month data and saves the annual aagregate data as well. If the program finds a
# per-month data file, it loads the data just for concatenation, so in the end, the complete
# aggregate annual data can be saved. This way it is relatively easy to add extra data once
# new data becomes available (could be done more efficiently though).
# If data needs to be replaced, either the output datasets can be deleted, or the function
# check.file.exists below can be changed to return FALSE
#
# Currently the program uses the Parallel package to load the monthly NetCDF files in
# parallel using the forking mechanism, so this might not work on windows machines.

# It is unclear to me whether the time used in the file name is the beginning of the
# 5-minute interval or the end. May not matter much, but anyway.

check.file.exists <- function(thePath) file.exists(thePath)

# uncomment the next line to replace all data
# check.file.exists <- function(thePath) FALSE


#
# load required packages
#
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(stringr))
suppressPackageStartupMessages(require(tidyr))
suppressPackageStartupMessages(require(ncdf4))
suppressPackageStartupMessages(require(parallel))


# One can use the "ProjectData" environment variable to determine the output location

if (Sys.getenv("ProjectData") == "") {
outputDirectory <- file.path("KNMI", "PrecipitationRadar", "RData") # could be simplified
} else
outputDirectory <- file.path(Sys.getenv("ProjectData"),
"KNMI",
"PrecipitationRadar",
"RData")

#
# list the year-directories
#
theDirs <- list.dirs(path = file.path(ifelse(Sys.getenv("ProjectData") == "",
"ProjectData",
Sys.getenv("ProjectData")),
"KNMI",
"PrecipitationRadar",
"NetCDF"),
full.names = TRUE,
recursive = FALSE)

# debugging code
# theDir <- theDirs[[1]]
# theDir

# iterate over the years

for (theDir in theDirs) {

# Path to potential output file

pathToYearFile <- file.path(outputDirectory,
paste("PrecipitationRadarData-", basename(theDir), ".Rdata", sep = ""))

# if file exists, skip to next...

if (check.file.exists(pathToYearFile))
next ;

theMonthDirs <- list.dirs(path = theDir,
full.names = TRUE,
recursive = FALSE)

# debugging code
# theMonthDir <- theMonthDirs[[1]]
# theMonthDir


# create annual RadarData by concatenating all monthly data

RadarData <- bind_rows(
lapply(
theMonthDirs,
function(theMonthDir) {

# Path to potential output file

pathToMonthFile <- file.path(outputDirectory,
paste("PrecipitationRadarData-", basename(theDir), "-", basename(theMonthDir), ".Rdata", sep = ""))

# if file exists (but not the annual aggregate, which still has to be made), load the data...

if (file.exists(pathToMonthFile))
load(pathToMonthFile)
else {

# if the file does not exist, create it
# first tell the world what happens (and the time we started)

print(paste(date(), theMonthDir))

# make a list of all NetCDF files (assumed *.nc) below the month directory
# this includes per day directories

theFiles <- list.files(path = theMonthDir,
pattern = glob2rx("*.nc"),
full.names = TRUE,
recursive = TRUE,
no.. = TRUE)

# debugging code
# theFile <- theFiles[[1]]

nCores <- detectCores()
my.cluster <- makeForkCluster(nCores)

RadarDataMonth <- bind_rows(
parLapply(
my.cluster,
theFiles,
function(theFile) {

timepoint <- as.POSIXct(str_extract_all(theFile, "_[[:digit:]]{8}[[:digit:]]{4}\\.")[[1]],
format = "_%Y%m%d%H%M.",
tz = "UTC")

# code lended from Andrea's "getMetaDataNetCDF"

fileH <- nc_open(theFile)

variableOfInterest <- "image1_image_data"
missingValue <- ncatt_get(fileH, variableOfInterest, "_FillValue")
missingValue <- missingValue$value
scaleFactor <- ncatt_get(fileH, variableOfInterest, "scale_factor")
scaleFactor <- scaleFactor$value
addOffsetFactor <- ncatt_get(fileH, variableOfInterest, "add_offset")
addOffsetFactor <- addOffsetFactor$value

x <- ncvar_get(fileH, "x")
y <- ncvar_get(fileH, "y")
image_data <- ncvar_get(fileH, variableOfInterest)
#
nc_close(fileH)

# we obtained a matrix of integers, mostly containing na's
# note that this matrix first dimension is x, and the second is y
# so in terms of i, and j, the y values are columns (j)

# the next procedure is as follows
# first image_data is converted into a data frame.
# this data.frame has columns V1 to Vn with n the number of columns (this is the Y dimension)
# this data.frame has row.names 1:m with m the number of rows (this is the X dimension)
# we create the data.frame,
# set the X value to the numeric value of the row numbers and
# set the Start value equal to the time point
#
dataFrame <- as.data.frame(image_data)
dataFrame$X <- as.integer(rownames(dataFrame))
dataFrame$Start <- timepoint

# next we gather, basically stack, all columns starting with "V", including X and Start

thisFrameRadarData <- gather(dataFrame,
key = "yLabel", value = "Precipitation",
starts_with("V")) %>%

# Filter the NA's and the missing values,

filter(!is.na(Precipitation),
Precipitation > 0,
Precipitation != missingValue) %>%

# Compute Y as the numerical value of the column name without the "V"

mutate(Y = as.integer(substring(yLabel, 2)),

# calculate the actual measurement

Precipitation = scaleFactor * Precipitation + addOffsetFactor) %>%

# Select the required data

select(Start, X, Y, Precipitation)

return(thisFrameRadarData)
}))

# Refresh the cluster (all leaked data, if any gone)

stopCluster(my.cluster)

# save the monthly data

save(RadarDataMonth,
file = pathToMonthFile)
}

return(RadarDataMonth)
}))

# save the annual data

save(RadarData,
file = pathToYearFile)

}

warnings()
sessionInfo()
proc.time()
107 changes: 90 additions & 17 deletions generalWrangleRadar.R
Original file line number Diff line number Diff line change
@@ -1,35 +1,54 @@
library(ncdf4)
library(data.table)
library(sp)
library(raster)
#library(sp)
#library(raster)
library(proj4)

# SOURCE contains the projection string of the external data
# this is currently the Dutch grid.
# SOURCE should eventually be a parameter, if we want international use
# For now, we keep it as a global parameter

#De Bilt 140844, 457994
SOURCE <- "+proj=sterea +lat_0=52.15616055555555 +lon_0=5.38763888888889 +k=0.9999079 +x_0=155000 +y_0=463000 +ellps=bessel +towgs84=565.4171,50.3319,465.5524,-0.398957388243134,0.343987817378283,-1.87740163998045,4.0725 +units=m +no_defs"

pointX<-140844
pointY<-457994


convertToPolar<-function(pointX, pointY){
# projection string in the polar stereographic projection
# see alsoe http://adaguc.knmi.nl/contents/datasets/productdescriptions/W_ADAGUC_Product_description_RADNL_OPER_R___25PCPRR_L3.html
TARGET <- "+proj=stere +lat_0=90 +lon_0=0.0 +lat_ts=60.0 +a=6378.137 +b=6356.752 +x_0=0 +y_0=0"


RDH<- "+proj=sterea +lat_0=52.15616055555555 +lon_0=5.38763888888889 +k=0.9999079 +x_0=155000 +y_0=463000 +ellps=bessel +towgs84=565.4171,50.3319,465.5524,-0.398957388243134,0.343987817378283,-1.87740163998045,4.0725 +units=m +no_defs"
# for development, we use a typical NetCDF file, containing radar data

pointDF<-data.frame(pointX,pointY)
exampleNetCDFfile <- "./KNMI/PrecipitationRadar/KNMI-Data/RAD_NL25_RAC_MFBS_5min_201501312355.nc"
filepath <- exampleNetCDFfile

coordinates(pointDF)<-~pointX+pointY
crs(pointDF)<-RDH

polar<-"+proj=stere +lat_0=90 +lon_0=0.0 +lat_ts=60.0 +a=6378.137 +b=6356.752 +x_0=0 +y_0=0"
# the location of the KNMI in the Dutch grid
#De Bilt 140844, 457994

polars<-ptransform(cbind(pointDF$pointX,pointDF$pointY),RDH,polar, silent = F)
pointX<-140844
pointY<-457994

polars<-data.frame(polars)
colnames(polars)<-c("X","Y","Z")

#polar is a dataFrame
polars

# the convertToPolar function converts coordinates in the SOURCE projection to
# coordintates in the TARGET projection

convertToPolar <- function(pointX, pointY) {



# coordinates(pointDF)<-~pointX+pointY
# crs(pointDF)<-SOURCE

polars <- ptransform(cbind(pointX, pointY, 0), SOURCE, TARGET, silent = FALSE)

polars <- data.frame(polars)
colnames(polars) <- c("X", "Y", "Z")

#polars is a dataFrame
polars

}


Expand All @@ -47,6 +66,7 @@ fileFull<-nc_open(filepath)
x<-ncvar_get(fileFull, "x")
y<-ncvar_get(fileFull, "y")
dataGrid<-ncvar_get(fileFull,variableOfInterest)
nc_close(fileFull)



Expand Down Expand Up @@ -76,7 +96,60 @@ getMetaDataNetCDF<-function(filepath, variableOfInterest = "image1_image_data"){
scaleFactor <- scaleFactor$value
addOffsetFactor <- ncatt_get(fileFull,variableOfInterest,"add_offset")
addOffsetFactor <- addOffsetFactor$value
nc_close(fileFull)

list(missingValue = missingValue, scaleFactor = scaleFactor, addOffset = addOffsetFactor, variable = variableOfInterest)
}


getMetaDataNetCDF(exampleNetCDFfile)


makeCellfunction <- function(filepath) {

#loading NetCDF file
fileFull<-nc_open(filepath)

#reading in the variables
x<-ncvar_get(fileFull, "x")
y<-ncvar_get(fileFull, "y")

#close the NetCDF file
nc_close(fileFull)


cellSize <- x[[2]] - x[[1]] #constant size square grid is assumed in this projection

if (cellSize != abs(y[[1]] - y[[2]])) stop("Grid is not square") # we may drop this test

# note that the y-values can decrease as we go down from the top line (the first line number is one),
# as we assume y is largest in to of the figure, so y[[1]] - y[[2]] > 0
# in some cases this may be opposite.
# x values increase if we go to the right

offsetGrid <- cellSize / 2 #x,y coordinate of the grid are mid points

xmin <- min(x)
ymax <- max(y)

function(xvar, yvar) {

inPolarCoordinates <- convertToPolar(xvar, yvar)

# inPolarCoordinates$X may be up to "offsetGrid" smaller than xmin to fall in the first column
# inPolarCoordinates$Y may be up to "offsetGrid" larger than ymax to fall in the first row (top row)

columnX <- ceiling((inPolarCoordinates$X - xmin + offsetGrid) / cellSize) # getting to the NetCDF array cell from the coordinate (X)

if (y[[1]] < y[[2]])
rowY<-ceiling((yvar-ymin+offsetGrid)/cellSize)
else
rowY <- ceiling((ymax - inPolarCoordinates$Y + offsetGrid) / cellSize) # and (Y)
list(X = columnX, Y = rowY, xvar = xvar, yvar = yvar)
}
}

transFormFunction <- makeCellfunction(exampleNetCDFfile)


transFormFunction(pointX, pointY)