Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 86 additions & 42 deletions climate_toolbox/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,69 +3,113 @@
from climate_toolbox.utils.utils import *


def standardize_climate_data(ds):
"""
Read climate data and standardize units to:
- lon and lat,
- lon to -180 to 180 and
def load_climate_data(data_type, file_path):
""" load_climate_data(data_type, file_path)
Read and prepare climate data

:param data_type: str
datatype to be read, supported types:
bcsd, gmfd, best, era5

Parameters
----------
ds: xr.Dataset
:param file_path: str
File path

Returns
-------
xr.Dataset
:return: ds: xr.Dataset
xarray dataset loaded in memory
"""

ds = rename_coords_to_lon_and_lat(ds)
ds = convert_lons_split(ds, lon_name='lon')
return _load_climate_data(
_find_loader(data_type),
file_path
)


def _load_climate_data(loader, file_path):
with xr.open_dataset(file_path) as ds:
ds.load()

return loader(ds)

return ds

def load_min_max_temperatures(data_type, file_path_tmin, file_path_tmax):
""" load_min_max_temperatures(data_type, file_path_tmin, file_path_tmax)

def load_bcsd(fp, varname, lon_name='lon', broadcast_dims=('time',)):
:param data_type: str
datatype to be read, supported types:
bcsd, gmfd, best, era5

:param file_path_tmin: path for min temperature
:param file_path_tmax: path for max temperature
:return:
ds_tasmax: xr.Dataset, ds_tasmin: xr.Dataset
"""
Read and prepare climate data

After reading data, this method also fills NA values using linear
interpolation, and standardizes longitude to -180:180
return _load_min_max_temperatures(
_find_loader(data_type),
file_path_tmin,
file_path_tmax
)

Parameters
----------
fp: str
File path or dataset

varname: str
Variable name to be read
def _load_min_max_temperatures(loader, file_path_tmin, file_path_tmax):
with xr.open_dataset(file_path_tmin) as ds_tasmin:
ds_tasmin.load()
with xr.open_dataset(file_path_tmax) as ds_tasmax:
ds_tasmax.load()

lon_name : str, optional
Name of the longitude dimension (defualt selects from ['lon' or
'longitude'])
return loader(ds_tasmin), loader(ds_tasmax)

Returns
-------
xr.Dataset
xarray dataset loaded into memory
"""

if lon_name is not None:
lon_names = [lon_name]
def _find_loader(data_type):
""" Helper function to find climate data loader """

if hasattr(fp, 'sel_points'):
ds = fp
data_type = data_type.lower()

if 'bcsd' in data_type:
loader = load_bcsd
elif 'gmfd' in data_type:
loader = load_gmfd
elif 'best' in data_type:
loader = load_best
elif 'era' in data_type:
loader = load_era5
else:
with xr.open_dataset(fp) as ds:
ds.load()
raise TypeError("'" + data_type + "' not supported. Supported data "
"types are: NASA BCSD, GMFD, BEST, ERA5.")
return loader
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is great! but can we force the user to provide the exact loader name? ERAI and ERA5 have different formats, and GMFD-v1 and GMFD-v3 do too :)

Copy link
Contributor Author

@atrisovic atrisovic Sep 19, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my first implementation was for the users to pass the loader function, but that would mean that a user would need to import both the loader function and load_climate_data (from climate_toolbox.io.io import load_climate_data, load_bcsd) to be able to pass it on. but yeah, I can actually revert it back to that version, it's good if a user wants to write it's own loader func. what do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh sorry I didn't say that quite right. I don't mean load_era, I mean something like ERA5 rather than ERA. Different versions have different formats.



return standardize_climate_data(ds)
def standardize_climate_data(ds):
""" standardize_climate_data(ds)
Standardize climate data units to:
- lon and lat,
- lon to -180 to 180 and

:param ds: xr.Dataset
:return: ds: xr.Dataset
"""

ds = rename_coords_to_lon_and_lat(ds)
ds = convert_lons_split(ds, lon_name='lon')

def load_gmfd(fp, varname, lon_name='lon', broadcast_dims=('time',)):
pass
return ds


def load_bcsd(ds):
return ds


def load_best(fp, varname, lon_name='lon', broadcast_dims=('time',)):
def load_gmfd(ds):
if 'tmin' in ds.data_vars or 'tmax' in ds.data_vars:
return standardize_climate_data(ds)
if 'lat' not in ds.coords or 'lon' not in ds.coords:
ds = rename_coords_to_lon_and_lat(ds)
return convert_lons_split(ds, lon_name="lon")


def load_best():
pass


def load_era5():
pass