diff --git a/atlite/convert.py b/atlite/convert.py index 96a2974e..5a44387c 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -565,14 +565,21 @@ def convert_wind( ds: xr.Dataset, turbine: TurbineConfig, interpolation_method: Literal["logarithmic", "power"], + windspeed_bias_correction: xr.DataArray | None, ) -> xr.DataArray: """ Convert wind speeds for turbine to wind energy generation. """ V, POW, hub_height, P = itemgetter("V", "POW", "hub_height", "P")(turbine) + from_height = None + if windspeed_bias_correction is not None: + ds, from_height = windm.apply_windspeed_bias_correction( + ds, windspeed_bias_correction + ) + wnd_hub = windm.extrapolate_wind_speed( - ds, to_height=hub_height, method=interpolation_method + ds, to_height=hub_height, method=interpolation_method, from_height=from_height ) def apply_power_curve(da): @@ -598,6 +605,7 @@ def wind( smooth: bool | dict = False, add_cutout_windspeed: bool = False, interpolation_method: Literal["logarithmic", "power"] = "logarithmic", + windspeed_bias_correction: bool | xr.DataArray | None = None, **params, ) -> xr.DataArray: """ @@ -609,17 +617,20 @@ def wind( Parameters ---------- turbine : str or dict - A turbineconfig dictionary with the keys 'hub_height' for the - hub height and 'V', 'POW' defining the power curve. - Alternatively a str refering to a local or remote turbine configuration - as accepted by atlite.resource.get_windturbineconfig(). Locally stored turbine - configurations can also be modified with this function. E.g. to setup a different hub - height from the one used in the yaml file,one would write - "turbine=get_windturbineconfig(“NREL_ReferenceTurbine_5MW_offshore”)|{“hub_height”:120}" + A turbineconfig dictionary with the keys 'hub_height' for the hub height + and 'V', 'POW' defining the power curve. Alternatively a str refering to + a local or remote turbine configuration as accepted by + atlite.resource.get_windturbineconfig(). Locally stored turbine + configurations can also be modified with this function. E.g. to setup a + different hub height from the one used in the yaml file, one would write + >>> turbine = ( + >>> get_windturbineconfig("NREL_ReferenceTurbine_5MW_offshore") + >>> | {"hub_height":120} + >>> ) smooth : bool or dict - If True smooth power curve with a gaussian kernel as - determined for the Danish wind fleet to Delta_v = 1.27 and - sigma = 2.29. A dict allows to tune these values. + If True smooth power curve with a gaussian kernel as determined for the + Danish wind fleet to Delta_v = 1.27 and sigma = 2.29. A dict allows to + tune these values. add_cutout_windspeed : bool If True and in case the power curve does not end with a zero, will add zero power output at the highest wind speed in the power curve. If False, a warning will be @@ -628,6 +639,14 @@ def wind( interpolation_method : {"logarithmic", "power"} Law to interpolate wind speed to turbine hub height. Refer to :py:func:`atlite.wind.extrapolate_wind_speed`. + windspeed_bias_correction : bool or DataArray, optional + Correction factor that is applied to the wind speed at height + `.attrs["height"]`. Such a correction factor can be calculated using + :py:func:`atlite.wind.calculate_windspeed_bias_correction` with a raster + dataset of mean wind speeds. + if True, the scaling factor is taken from 'wnd_bias_correction' in `cutout` + (or a ValueError is raised), + if None, a scaling factor is applied if it exists in `cutout` Note ---- @@ -645,10 +664,29 @@ def wind( if smooth: turbine = windturbine_smooth(turbine, params=smooth) + if isinstance(windspeed_bias_correction, xr.DataArray): + # Front-load coordinate alignment cost + windspeed_bias_correction = windspeed_bias_correction.reindex_like(cutout.data) + elif windspeed_bias_correction is None: + windspeed_bias_correction = cutout.data.get("wnd_bias_correction") + elif windspeed_bias_correction is True: + try: + windspeed_bias_correction = cutout.data["wnd_bias_correction"] + except KeyError: + raise ValueError( + "Wind speed bias correction is required, but cutout does not contain " + "scaling factor: 'wnd_bias_correction'.\n" + "Regenerate the cutout or provide the scaling factors explicitly, ie.\n" + "cutout.wind(..., windspeed_bias_correction=scaling_factors)" + ) from None + else: + windspeed_bias_correction = None + return cutout.convert_and_aggregate( convert_func=convert_wind, turbine=turbine, interpolation_method=interpolation_method, + windspeed_bias_correction=windspeed_bias_correction, **params, ) diff --git a/atlite/cutout.py b/atlite/cutout.py index 5dceec1f..6f9892b4 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -134,6 +134,12 @@ def __init__(self, path, **cutoutparams): gebco_path: str Path to find the gebco NetCDF file. Only necessary when including the gebco module. + windspeed_real_average_path: str, optional + Path to raster dataset with wind speeds to bias correct average wind + speeds. If not given during ERA5 cutout creation, the corresponding + windspeed_bias_correction feature is skipped. It has been tested to + work well with the mean wind speeds at 100m from global wind atlas + at https://globalwindatlas.info/ . parallel : bool, default False Whether to open dataset in parallel mode. Take effect for all xr.open_mfdataset usages. diff --git a/atlite/data.py b/atlite/data.py index 5482fc2f..efbf616e 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -31,6 +31,7 @@ def get_features( tmpdir=None, monthly_requests=False, concurrent_requests=False, + **parameter_updates, ): """ Load the feature data for a given module. @@ -38,7 +39,7 @@ def get_features( This get the data for a set of features from a module. All modules in `atlite.datasets` are allowed. """ - parameters = cutout.data.attrs + parameters = cutout.data.attrs | parameter_updates lock = SerializableLock() datasets = [] get_data = datamodules[module].get_data @@ -58,7 +59,7 @@ def get_features( datasets = compute(*datasets) - ds = xr.merge(datasets, compat="equals") + ds = xr.merge([da for da in datasets if da is not None], compat="equals") for v in ds: da = ds[v] da.attrs["module"] = module @@ -140,6 +141,7 @@ def cutout_prepare( dask_kwargs=None, monthly_requests=False, concurrent_requests=False, + **parameter_updates, ): """ Prepare all or a selection of features in a cutout. @@ -185,6 +187,9 @@ def cutout_prepare( concurrent_requests : bool, optional If True, the monthly data requests are posted concurrently. Only has an effect if `monthly_requests` is True. The default is False. + **parameter_updates + Updates of creation parameters, for list of available params check docstring of + :py:method:`atlite.Cutout.__init__`. Returns ------- @@ -224,19 +229,27 @@ def cutout_prepare( data_format=data_format, monthly_requests=monthly_requests, concurrent_requests=concurrent_requests, + **parameter_updates, + ) + new_features = set(da.attrs["feature"] for da in ds.data_vars.values()) + if not new_features: + logger.warning("No new features prepared") + return + attrs = non_bool_dict( + cutout.data.attrs + | ds.attrs + | dict(prepared_features=list(prepared | new_features)) + | parameter_updates ) - prepared |= set(missing_features) - - cutout.data.attrs.update(dict(prepared_features=list(prepared))) - attrs = non_bool_dict(cutout.data.attrs) - attrs.update(ds.attrs) # Add optional compression to the newly prepared features if compression: - for v in missing_vars: - ds[v].encoding.update(compression) + for da in ds.data_vars.values(): + da.encoding.update(compression) - ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs) + ds = cutout.data.merge( + ds[missing_vars.loc[list(new_features)].values] + ).assign_attrs(attrs) # write data to tmp file, copy it to original data, this is much safer # than appending variables diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index c143ff32..24f16fe8 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -8,12 +8,14 @@ https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation """ +import datetime import logging import os import warnings import weakref from pathlib import Path from tempfile import mkstemp +from typing import Literal import cdsapi import numpy as np @@ -24,8 +26,9 @@ from dask.utils import SerializableLock from numpy import atleast_1d -from atlite.gis import maybe_swap_spatial_dims +from atlite.gis import maybe_swap_spatial_dims, rotate from atlite.pv.solar_position import SolarPosition +from atlite.wind import calculate_windspeed_bias_correction # Null context for running a with statements wihout any context try: @@ -47,6 +50,7 @@ def nullcontext(): features = { "height": ["height"], "wind": ["wnd100m", "wnd_shear_exp", "wnd_azimuth", "roughness"], + "windspeed_bias_correction": ["wnd_bias_correction"], "influx": [ "influx_toa", "influx_direct", @@ -430,10 +434,11 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase def retrieve_data( - product: str, + dataset: str, chunks: dict[str, int] | None = None, tmpdir: str | Path | None = None, lock: SerializableLock | None = None, + data_format: Literal["grib", "netcdf"] = "grib", **updates, ) -> xr.Dataset: """ @@ -444,7 +449,7 @@ def retrieve_data( Parameters ---------- - product : str + dataset : str Product name, e.g. 'reanalysis-era5-single-levels'. chunks : dict, optional Chunking for xarray dataset, e.g. {'time': 1, 'x': 100, 'y': 100}. @@ -454,6 +459,8 @@ def retrieve_data( Default is None, which uses the system's temporary directory. lock : dask.utils.SerializableLock, optional Lock for thread-safe file writing. Default is None. + data_format : {"grib", "netcdf"} + Data format to use for retrieving from CDS. updates : dict Additional parameters for the request. Must include 'year', 'month', and 'variable'. @@ -467,7 +474,7 @@ def retrieve_data( Examples -------- >>> ds = retrieve_data( - ... product='reanalysis-era5-single-levels', + ... dataset='reanalysis-era5-single-levels', ... chunks={'time': 1, 'x': 100, 'y': 100}, ... tmpdir='/tmp', ... lock=None, @@ -478,23 +485,23 @@ def retrieve_data( ... ) """ request = {"product_type": ["reanalysis"], "download_format": "unarchived"} - request.update(updates) + request.update(updates, data_format=data_format) assert {"year", "month", "variable"}.issubset(request), ( "Need to specify at least 'variable', 'year' and 'month'" ) - logger.debug(f"Requesting {product} with API request: {request}") + logger.debug(f"Requesting {dataset} with API request: {request}") client = cdsapi.Client( info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level ) - result = client.retrieve(product, request) + result = client.retrieve(dataset, request) if lock is None: lock = nullcontext() - suffix = f".{request['data_format']}" # .netcdf or .grib + suffix = f".{data_format}" # .netcdf or .grib with lock: fd, target = mkstemp(suffix=suffix, dir=tmpdir) os.close(fd) @@ -507,7 +514,7 @@ def retrieve_data( result.download(target) # Convert from grib to netcdf locally, same conversion as in CDS backend - if request["data_format"] == "grib": + if data_format == "grib": ds = open_with_grib_conventions(target, chunks=chunks, tmpdir=tmpdir) else: ds = xr.open_dataset(target, chunks=sanitize_chunks(chunks)) @@ -517,6 +524,120 @@ def retrieve_data( return ds +def retrieve_windspeed_average( + cutout=None, + height: int = 100, + first_year: int = 2008, + last_year: int | None = 2017, + data_format: Literal["grib", "netcdf"] = "grib", + **retrieval_params, +): + """ + Retrieve average windspeed from `first_year` to `last_year` + + The default time-period 2008-2017 was chosen to align with the simulation + period of GWA3. + + Parameters + ---------- + cutout : atlite.Cutout or None + Cutout for which to retrieve windspeeds from CDS. If no cutout is + specified the global means are retrieved at native resolution 0.25/0.25. + height : int + Height of windspeeds (ERA5 typically knows about 10m, 100m) + first_year : int, defaults to 2008 + First year to take into account + last_year : int, defaults to 2017 + Last year to take into account (if omitted takes the previous year) + data_format : {"grib", "netcdf"} + Data format to use for retrieving from CDS. + **retrieval_params + + References + ---------- + https://globalwindatlas.info/ + + Returns + ------- + DataArray + Mean windspeed at cutout dimension + """ + if last_year is None: + last_year = datetime.date.today().year - 1 + + retrieval_params = ( + { + "dataset": "reanalysis-era5-single-levels", + "product_type": "reanalysis", + } + | ( + { + "area": _area(cutout.coords), + "chunks": cutout.chunks, + "grid": f"{cutout.dx}/{cutout.dy}", + } + if cutout is not None + else {} + ) + | retrieval_params + ) + + def retrieve_chunk(year): + ds = retrieve_data( + variable=[ + f"{height}m_u_component_of_wind", + f"{height}m_v_component_of_wind", + ], + year=[year], + month=[f"{m:02d}" for m in range(1, 12 + 1)], + day=[f"{d:02d}" for d in range(1, 31 + 1)], + time=[f"{h:02d}:00" for h in range(0, 23 + 1)], + data_format=data_format, + **retrieval_params, + ) + ds = _rename_and_clean_coords(ds) + + if cutout is None: + # the default longitude range of CDS is [0, 360], while [-180, 180] is standard + ds = rotate(ds) + + return ( + sqrt(ds[f"u{height}"] ** 2 + ds[f"v{height}"] ** 2) + .mean("time") + .assign_attrs( + units=ds[f"u{height}"].attrs["units"], + long_name=f"{height} metre wind speed as long run average", + ) + ) + + years = range(first_year, last_year + 1) + return xr.concat( + compute(*(delayed(retrieve_chunk)(str(year)) for year in years)), + dim=pd.Index(years, name="year"), + ).mean("year") + + +def get_data_windspeed_bias_correction(cutout, retrieval_params, creation_parameters): + """ + Get windspeed bias correction + """ + real_average_path = creation_parameters.get("windspeed_real_average_path") + if real_average_path is None: + logger.info( + "Skipping feature windspeed_bias_correction, since windspeed_real_average_path was not provided.\n" + "Download mean wind speeds from global wind atlas at https://globalwindatlas.info/ and add it\n" + 'to the cutout with `cutout.prepare(windspeed_real_average_path="path/to/gwa3_250_windspeed_100m.tif")`' + ) + return None + height = creation_parameters.get("windspeed_height", 100) + data_average = retrieve_windspeed_average(cutout, height, **retrieval_params) + + bias_correction = calculate_windspeed_bias_correction( + cutout, real_average_path, height=height, data_average=data_average + ) + return bias_correction.to_dataset(name="wnd_bias_correction") + + def get_data( cutout, feature, @@ -552,8 +673,10 @@ def get_data( If True, the monthly data requests are posted concurrently. Only has an effect if `monthly_requests` is True. **creation_parameters : - Additional keyword arguments. The only effective argument is 'sanitize' - (default True) which sets sanitization of the data on or off. + Additional keyword arguments. + `sanitize` (default True) sets sanitization of the data on or off. + `windspeed_real_average_path` and `windspeed_height` are used by the + "windspeed_bias_correction" feature to calculate the correction factor. Returns ------- @@ -566,7 +689,8 @@ def get_data( sanitize = creation_parameters.get("sanitize", True) retrieval_params = { - "product": "reanalysis-era5-single-levels", + "dataset": "reanalysis-era5-single-levels", + "product_type": "reanalysis", "area": _area(coords), "chunks": cutout.chunks, "grid": f"{cutout.dx}/{cutout.dy}", @@ -588,6 +712,12 @@ def retrieve_once(time): if feature in static_features: return retrieve_once(retrieval_times(coords, static=True)).squeeze() + elif feature == "windspeed_bias_correction": + return func( + cutout, + retrieval_params=retrieval_params, + creation_parameters=creation_parameters, + ) time_chunks = retrieval_times(coords, monthly_requests=monthly_requests) if concurrent_requests: diff --git a/atlite/gis.py b/atlite/gis.py index 69e11678..819a579a 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -8,6 +8,7 @@ import logging import multiprocessing as mp from collections import OrderedDict +from functools import singledispatch from pathlib import Path from warnings import catch_warnings, simplefilter @@ -771,7 +772,69 @@ def maybe_swap_spatial_dims(ds, namex="x", namey="y"): return ds.isel(**swaps) if swaps else ds -def _as_transform(x, y): +@singledispatch +def rotate(x): + """ + Rotate the x coordinate of a dataarray from 0 to 360, to standard + coordinates between -180 and 180 degrees. + """ + _, *types = rotate.registry.keys() + raise ValueError(f"rotate is only implemented for: {', '.join(types)}") + + +@rotate.register(xr.Dataset) +@rotate.register(xr.DataArray) +def _(da): + x = da.indexes["x"] + + assert x.is_monotonic_increasing + i = x.searchsorted(180.0) + if i >= len(x): + return da + + da = xr.concat( + [ + da.isel(x=slice(i, None)).assign_coords(x=x[i:] - 360.0), + da.isel(x=slice(None, i)), + ], + dim="x", + ) + + if "lon" in da.coords: + da = da.assign_coords(lon=da.coords["x"]) + + return da + + +@rotate.register +def _(x: pd.Index) -> pd.Index: + assert x.is_monotonic_increasing + i = x.searchsorted(180.0) + if i >= len(x): + return x + + return (x[i:] - 360.0).append(x[:i]) + + +def _as_transform(x: pd.Index | np.ndarray, y: pd.Index | np.ndarray) -> rio.Affine: + """ + Derive a transform for equally-spaced longitude, latitude coordinates + + x and y are assumed to point to the cell center, while the transform points + to the lower, left corner. + + Parameters + ---------- + x : Index + x coordinates + y : Index + y coordinates + + Returns + ------- + transform : Affine + Rasterio Affine object + """ lx, rx = x[[0, -1]] ly, uy = y[[0, -1]] diff --git a/atlite/wind.py b/atlite/wind.py index 92dd1f19..840a673c 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -9,11 +9,15 @@ import logging import re +from pathlib import Path from typing import TYPE_CHECKING import numpy as np +import rasterio as rio import xarray as xr +from .gis import CRS, _as_transform + logger = logging.getLogger(__name__) @@ -43,6 +47,9 @@ def extrapolate_wind_speed( Dataset containing the wind speed time-series at 'from_height' with key 'wnd{height:d}m' and the surface orography with key 'roughness' at the geographic locations of the wind speeds. + from_height : int, optional + Height (m) from which the wind speeds are interpolated to 'to_height'. + If not provided, the closest height to 'to_height' is selected. to_height : int|float Height (m) to which the wind speeds are extrapolated to. from_height : int, optional @@ -72,14 +79,11 @@ def extrapolate_wind_speed( Wind Resource Assessment: A Comparison against Tall Towers' https://doi.org/10.3390/en14144169 . """ - # Fast lane - to_name = f"wnd{int(to_height):0d}m" - if to_name in ds: - return ds[to_name] - if from_height is None: # Determine closest height to to_name - heights = np.asarray([int(s[3:-1]) for s in ds if re.match(r"wnd\d+m", s)]) + heights = np.asarray( + [int(m.group(1)) for s in ds if (m := re.match(r"wnd(\d+)m", s))] + ) if len(heights) == 0: raise AssertionError("Wind speed is not in dataset") @@ -88,6 +92,10 @@ def extrapolate_wind_speed( from_name = f"wnd{int(from_height):0d}m" + # Fast lane + if from_height == to_height: + return ds[from_name] + if method == "logarithmic": try: roughness = ds["roughness"] @@ -115,7 +123,7 @@ def extrapolate_wind_speed( f"Interpolation method must be 'logarithmic' or 'power', but is: {method}" ) - wnd_spd.attrs.update( + return wnd_spd.assign_attrs( { "long name": ( f"extrapolated {to_height} m wind speed using {method_desc} " @@ -123,6 +131,134 @@ def extrapolate_wind_speed( ), "units": "m s**-1", } - ) + ).rename(f"wnd{to_height}m") + + +def calculate_windspeed_bias_correction( + cutout, + real_average: str | rio.DatasetReader, + height: int = 100, + data_average: xr.DataArray | None = None, + data_crs: CRS | str | int | None = None, +) -> xr.DataArray: + """ + Derive a bias correction factor for windspeed at lra_height + + Regrids the raster dataset in real_average to cutout grid, retrieves the average + windspeed from the first dataset that offers + :py:func:`retrieve_windspeed_average` (only ERA5, currently). + + Parameters + ---------- + cutout : Cutout, optional + Cutout for which to retrieve data_average. Can be omitted if + data_average and data_crs are passed explicitly. + real_average : Path or rasterio.Dataset + Raster dataset with wind speeds to bias correct average wind speeds + height : int + Height in meters at which average windspeeds are provided + data_average : DataArray, optional + Long run average of the windspeed data, if not provided it is retrieved + with the standard settings suitable for bias correction with GWA 3.1. + data_crs : CRS like, optional + CRS that data_average is given in. + + Returns + ------- + DataArray + Ratio between windspeeds in `real_average` to those of average windspeeds in + dataset. + + See Also + -------- + atlite.datasets.era5.retrieve_windspeed_average + """ + + if data_average is None: + from . import datasets + + for module in np.atleast_1d(cutout.module): + retrieve_windspeed_average = getattr( + getattr(datasets, module), "retrieve_windspeed_average" + ) + if retrieve_windspeed_average is not None: + data_crs = getattr(datasets, module).crs + break + else: + raise AssertionError( + "None of the datasets modules define retrieve_windspeed_average" + ) + + logger.info( + "Retrieving average windspeeds at %d from module %s", height, module + ) + data_average = retrieve_windspeed_average(cutout, height) + + if isinstance(real_average, str | Path): + real_average = rio.open(real_average) + + if isinstance(real_average, rio.DatasetReader): + real_average = rio.band(real_average, 1) + + if isinstance(real_average, rio.Band): + transform = _as_transform(data_average.indexes["x"], data_average.indexes["y"]) + + real_average, _ = rio.warp.reproject( + real_average, + np.empty(data_average.shape), + dst_crs=CRS(data_crs), + dst_transform=transform, + dst_nodata=np.nan, + resampling=rio.enums.Resampling.average, + ) + + real_average = xr.DataArray(real_average, data_average.coords) + else: + raise ValueError( + f"`real_average` needs to be a path or rasterio object to a raster dataset" + f", but is: {real_average}" + ) + + return (real_average / data_average).assign_attrs(height=height) + + +def apply_windspeed_bias_correction( + ds: xr.Dataset, scaling_factor: xr.DataArray +) -> tuple[xr.Dataset, int | None]: + """ + Apply bias correction to windspeeds. + + uses either an explicitly provided scaling factor or a pre-calculated + scaling factor supplied in cutout. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing wind speed time-series (at one or multiple heights) + with keys like 'wnd{height:d}m' and potentially a pre-calculated scaling + factor at key 'wnd_bias_correction' + scaling_factor : DataArray + Scaling factor for the wind speed at height `.attrs["height"]` + + Returns + ------- + ds, height + The dataset with a scaled wnd time-series and its height or None + + Raises + ------ + ValueError + If the scaling factor does not have the 'height` attribute. + """ + + try: + height = int(scaling_factor.attrs["height"]) + name = f"wnd{height:d}m" + windspeed = ds[name] + except (KeyError, ValueError): + raise ValueError( + "The provided bias correction needs to have a 'height' attribute where to " + "scale wind speeds" + ) - return wnd_spd.rename(to_name) + return ds.assign({name: windspeed * scaling_factor}), height