diff --git a/notebooks/tutorial/kscale_access01.ipynb b/notebooks/tutorial/kscale_access01.ipynb new file mode 100644 index 00000000..0dee4d3b --- /dev/null +++ b/notebooks/tutorial/kscale_access01.ipynb @@ -0,0 +1,2298 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0ac2eeea-4a38-494b-83e8-2b07060113db", + "metadata": {}, + "source": [ + "# This is the v1 accessor to load a netcdf file\n", + "The path the the dataset is hardwired" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5515d404-7c2b-498c-ad22-ebb568f3a05a", + "metadata": {}, + "outputs": [], + "source": [ + "import pyearthtools.data\n", + "\n", + "from pyearthtools.data import Petdt\n", + "from pathlib import Path\n", + "from pyearthtools.data.transforms import Transform, TransformCollection\n", + "import pyearthtools.pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a278cddf-f493-4dfb-9db7-c9af1db8e1cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyearthtools.data.indexes._indexes.ArchiveIndex" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pyearthtools.data.indexes.ArchiveIndex" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "371f5788-0065-4316-a9c7-5a11e6d6d4cc", + "metadata": {}, + "outputs": [], + "source": [ + "class kscale(pyearthtools.data.indexes.ArchiveIndex):\n", + " def __init__(\n", + " self,\n", + " variables: list[str] | str,\n", + " *,\n", + " level_value: int | float | list[int | float] | tuple[list | int, ...] | None = None,\n", + " transforms: Transform | TransformCollection | None = None,\n", + " ):\n", + " super().__init__(\n", + " transforms=transforms,\n", + " )\n", + " self.record_initialisation()\n", + "\n", + " # This is where the path is hardwired\n", + " def filesystem(\n", + " self,\n", + " querytime: str | Petdt\n", + " ) -> Path | dict[str, str | Path]:\n", + " return Path(\"/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6ae14046-acd0-4daf-bd73-f98088a9c22e", + "metadata": {}, + "outputs": [], + "source": [ + "accessor=kscale(['ss'])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "71c406c1-d711-46b6-9307-f24342c444ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
kscale\n",
+ "\tInitialisation \n",
+ "\t\t level_value None\n",
+ "\t\t variables ['ss']\n",
+ "\tTransforms \n",
+ "\t\t StandardCoordinateNames {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}<xarray.Dataset> Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " level_height_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " sigma_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + "Attributes:\n", + " Conventions: CF-1.7
Pipeline\n",
+ "\tDescription `pyearthtools.pipeline` Data Pipeline\n",
+ "\n",
+ "\n",
+ "\tInitialisation \n",
+ "\t\t exceptions_to_ignore None\n",
+ "\t\t iterator None\n",
+ "\t\t max_exception_count -1\n",
+ "\t\t name None\n",
+ "\t\t sampler None\n",
+ "\tSteps \n",
+ "\t\t __main__.kscale {'kscale': {'level_value': 'None', 'variables': "['ss']"}}kscale\n",
+ "\tDescription Met Office k-scale data\n",
+ "\t\t range '20030101'\n",
+ "\t\t Documentation 'None'\n",
+ "\n",
+ "\n",
+ "\tInitialisation \n",
+ "\t\t level_value None\n",
+ "\t\t variables ['ss']\n",
+ "\tTransforms \n",
+ "\t\t StandardCoordinateNames {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}<xarray.Dataset> Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " level_height_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " sigma_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + "Attributes:\n", + " Conventions: CF-1.7
netcdf_file\n",
+ "\tDescription Met Office k-scale data\n",
+ "\t\t range '20030101'\n",
+ "\t\t Documentation 'None'\n",
+ "\t\t Vars ['m01s30i001', 'latitude_longitude', 'm01s30i002', 'potential_t_avg_250m', 'specific_humidity', 'upward_air_velocity', 'level_height_bnds', 'sigma_bnds', 'horizontal_wind_divergence', 'total_precip__rain___snow_']\n",
+ "\n",
+ "\n",
+ "\tInitialisation \n",
+ "\t\t filepath '/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'\n",
+ "\t\t level_value None\n",
+ "\t\t variables ['var_I_want']\n",
+ "\tTransforms \n",
+ "\t\t StandardCoordinateNames {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}<xarray.Dataset> Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " level_height_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " sigma_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + "Attributes:\n", + " Conventions: CF-1.7
Pipeline\n",
+ "\tDescription `pyearthtools.pipeline` Data Pipeline\n",
+ "\n",
+ "\n",
+ "\tInitialisation \n",
+ "\t\t exceptions_to_ignore None\n",
+ "\t\t iterator None\n",
+ "\t\t max_exception_count -1\n",
+ "\t\t name None\n",
+ "\t\t sampler None\n",
+ "\tSteps \n",
+ "\t\t __main__.netcdf_file {'netcdf_file': {'filepath': "'/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'", 'level_value': 'None', 'variables': "['var_I_want']"}}netcdf_file\n",
+ "\tDescription netcdf data file (assumed?)\n",
+ "\t\t range DatetimeIndex(['2003-01-01 00:10:00.000000128',\n",
+ " '2003-01-01 00:19:59.999999872',\n",
+ " '2003-01-01 00:30:00',\n",
+ " '2003-01-01 00:40:00.000000128',\n",
+ " '2003-01-01 00:49:59.999999872',\n",
+ " '2003-01-01 01:00:00',\n",
+ " '2003-01-01 01:10:00.000000128',\n",
+ " '2003-01-01 01:19:59.999999872',\n",
+ " '2003-01-01 01:30:00',\n",
+ " '2003-01-01 01:40:00.000000128',\n",
+ " ...\n",
+ " '2003-01-01 22:30:00',\n",
+ " '2003-01-01 22:40:00.000000128',\n",
+ " '2003-01-01 22:49:59.999999872',\n",
+ " '2003-01-01 23:00:00',\n",
+ " '2003-01-01 23:10:00.000000128',\n",
+ " '2003-01-01 23:19:59.999999872',\n",
+ " '2003-01-01 23:30:00',\n",
+ " '2003-01-01 23:40:00.000000128',\n",
+ " '2003-01-01 23:49:59.999999872',\n",
+ " '2003-01-02 00:00:00'],\n",
+ " dtype='datetime64[ns]', name='time', length=144, freq=None)\n",
+ "\t\t Documentation 'None'\n",
+ "\t\t Vars ['m01s30i001', 'latitude_longitude', 'm01s30i002', 'potential_t_avg_250m', 'specific_humidity', 'upward_air_velocity', 'level_height_bnds', 'sigma_bnds', 'horizontal_wind_divergence', 'total_precip__rain___snow_']\n",
+ "\n",
+ "\n",
+ "\tInitialisation \n",
+ "\t\t filepath '/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'\n",
+ "\t\t level_value None\n",
+ "\t\t var_req ['var_I_want']\n",
+ "\tTransforms \n",
+ "\t\t StandardCoordinateNames {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}<xarray.Dataset> Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " level_height_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " sigma_bnds (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n", + "Attributes:\n", + " Conventions: CF-1.7
Pipeline\n",
+ "\tDescription `pyearthtools.pipeline` Data Pipeline\n",
+ "\n",
+ "\n",
+ "\tInitialisation \n",
+ "\t\t exceptions_to_ignore None\n",
+ "\t\t iterator None\n",
+ "\t\t max_exception_count -1\n",
+ "\t\t name None\n",
+ "\t\t sampler None\n",
+ "\tSteps \n",
+ "\t\t __main__.netcdf_file {'netcdf_file': {'filepath': "'/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'", 'level_value': 'None', 'var_req': "['var_I_want']"}}