From 8e7dbc8cd9931a82ce4d0582cc3948dfe007efb2 Mon Sep 17 00:00:00 2001 From: gabriel rooney Date: Thu, 13 Nov 2025 10:01:34 +0000 Subject: [PATCH] created accfessor tutorial based on k-scale data --- notebooks/tutorial/kscale_access01.ipynb | 2298 ++++++++++++++++++++ notebooks/tutorial/kscale_access02.ipynb | 1905 +++++++++++++++++ notebooks/tutorial/kscale_access03.ipynb | 2386 +++++++++++++++++++++ notebooks/tutorial/kscale_access04.ipynb | 2456 ++++++++++++++++++++++ 4 files changed, 9045 insertions(+) create mode 100644 notebooks/tutorial/kscale_access01.ipynb create mode 100644 notebooks/tutorial/kscale_access02.ipynb create mode 100644 notebooks/tutorial/kscale_access03.ipynb create mode 100644 notebooks/tutorial/kscale_access04.ipynb diff --git a/notebooks/tutorial/kscale_access01.ipynb b/notebooks/tutorial/kscale_access01.ipynb new file mode 100644 index 00000000..0dee4d3b --- /dev/null +++ b/notebooks/tutorial/kscale_access01.ipynb @@ -0,0 +1,2298 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0ac2eeea-4a38-494b-83e8-2b07060113db", + "metadata": {}, + "source": [ + "# This is the v1 accessor to load a netcdf file\n", + "The path the the dataset is hardwired" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5515d404-7c2b-498c-ad22-ebb568f3a05a", + "metadata": {}, + "outputs": [], + "source": [ + "import pyearthtools.data\n", + "\n", + "from pyearthtools.data import Petdt\n", + "from pathlib import Path\n", + "from pyearthtools.data.transforms import Transform, TransformCollection\n", + "import pyearthtools.pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a278cddf-f493-4dfb-9db7-c9af1db8e1cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyearthtools.data.indexes._indexes.ArchiveIndex" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pyearthtools.data.indexes.ArchiveIndex" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "371f5788-0065-4316-a9c7-5a11e6d6d4cc", + "metadata": {}, + "outputs": [], + "source": [ + "class kscale(pyearthtools.data.indexes.ArchiveIndex):\n", + " def __init__(\n", + " self,\n", + " variables: list[str] | str,\n", + " *,\n", + " level_value: int | float | list[int | float] | tuple[list | int, ...] | None = None,\n", + " transforms: Transform | TransformCollection | None = None,\n", + " ):\n", + " super().__init__(\n", + " transforms=transforms,\n", + " )\n", + " self.record_initialisation()\n", + "\n", + " # This is where the path is hardwired\n", + " def filesystem(\n", + " self,\n", + " querytime: str | Petdt\n", + " ) -> Path | dict[str, str | Path]:\n", + " return Path(\"/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6ae14046-acd0-4daf-bd73-f98088a9c22e", + "metadata": {}, + "outputs": [], + "source": [ + "accessor=kscale(['ss'])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "71c406c1-d711-46b6-9307-f24342c444ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
kscale\n",
+       "\tInitialisation                 \n",
+       "\t\t level_value                    None\n",
+       "\t\t variables                      ['ss']\n",
+       "\tTransforms                     \n",
+       "\t\t StandardCoordinateNames        {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}
" + ], + "text/plain": [ + "kscale\n", + "\tInitialisation \n", + "\t\t level_value None\n", + "\t\t variables ['ss']\n", + "\tTransforms \n", + "\t\t StandardCoordinateNames {'latitude': \"['lat', 'Latitude', 'yt_ocean', 'yt']\", 'longitude': \"['lon', 'Longitude', 'xt_ocean', 'xt']\", 'replacement_dictionary': 'None', 'time': \"['Time']\"}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accessor" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1172f0bb-08e6-4a11-a55e-5d2216a7adf0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/users/train106/PyEarthTools/packages/data/src/pyearthtools/data/indexes/_indexes.py:480: IndexWarning: Could not find time in dataset to select on. Petdt('2020-01-01')\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 409MB\n",
+       "Dimensions:                     (time: 144, latitude: 296, longitude: 343,\n",
+       "                                 bnds: 2)\n",
+       "Coordinates:\n",
+       "  * time                        (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n",
+       "  * latitude                    (latitude) float32 1kB -19.98 -19.94 ... -8.028\n",
+       "  * longitude                   (longitude) float32 1kB 120.0 120.0 ... 133.9\n",
+       "    forecast_period             (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n",
+       "    forecast_reference_time     datetime64[ns] 8B ...\n",
+       "    level_height                float32 4B ...\n",
+       "    model_level_number          int32 4B ...\n",
+       "    sigma                       float32 4B ...\n",
+       "Dimensions without coordinates: bnds\n",
+       "Data variables:\n",
+       "    m01s30i001                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    latitude_longitude          int32 4B ...\n",
+       "    m01s30i002                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    potential_t_avg_250m        (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    specific_humidity           (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    upward_air_velocity         (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    level_height_bnds           (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    sigma_bnds                  (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    horizontal_wind_divergence  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    total_precip__rain___snow_  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.7
" + ], + "text/plain": [ + " Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array\n", + " level_height_bnds (bnds) float32 8B dask.array\n", + " sigma_bnds (bnds) float32 8B dask.array\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array\n", + "Attributes:\n", + " Conventions: CF-1.7" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accessor['20200101']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2a1e0e4f-0958-4bdc-94ec-97e2c5ba78c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Pipeline\n",
+       "\tDescription                    `pyearthtools.pipeline` Data Pipeline\n",
+       "\n",
+       "\n",
+       "\tInitialisation                 \n",
+       "\t\t exceptions_to_ignore           None\n",
+       "\t\t iterator                       None\n",
+       "\t\t max_exception_count            -1\n",
+       "\t\t name                           None\n",
+       "\t\t sampler                        None\n",
+       "\tSteps                          \n",
+       "\t\t __main__.kscale                {'kscale': {'level_value': 'None', 'variables': "['ss']"}}
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pipe1 = pyearthtools.pipeline.Pipeline(\n", + " accessor,\n", + ")\n", + "pipe1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63020c22-4198-4fd3-bf4b-1032d6d2abb7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pet_dev_nb_cpu_jasmin", + "language": "python", + "name": "pet_dev_nb_cpu_jasmin" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorial/kscale_access02.ipynb b/notebooks/tutorial/kscale_access02.ipynb new file mode 100644 index 00000000..52d15dfb --- /dev/null +++ b/notebooks/tutorial/kscale_access02.ipynb @@ -0,0 +1,1905 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0ac2eeea-4a38-494b-83e8-2b07060113db", + "metadata": {}, + "source": [ + "# This is the v2 accessor to load a netcdf file\n", + "\n", + "The path the the dataset is hardwired.\n", + "\n", + "There is a static description." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5515d404-7c2b-498c-ad22-ebb568f3a05a", + "metadata": {}, + "outputs": [], + "source": [ + "import pyearthtools.data\n", + "\n", + "from pyearthtools.data import Petdt\n", + "from pathlib import Path\n", + "from pyearthtools.data.transforms import Transform, TransformCollection\n", + "import pyearthtools.pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a278cddf-f493-4dfb-9db7-c9af1db8e1cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyearthtools.data.indexes._indexes.ArchiveIndex" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pyearthtools.data.indexes.ArchiveIndex" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "371f5788-0065-4316-a9c7-5a11e6d6d4cc", + "metadata": {}, + "outputs": [], + "source": [ + "class kscale(pyearthtools.data.indexes.ArchiveIndex):\n", + " def __init__(\n", + " self,\n", + " variables: list[str] | str,\n", + " *,\n", + " level_value: int | float | list[int | float] | tuple[list | int, ...] | None = None,\n", + " transforms: Transform | TransformCollection | None = None,\n", + " ):\n", + " super().__init__(\n", + " transforms=transforms,\n", + " )\n", + " self.record_initialisation()\n", + "\n", + " @property\n", + " def _desc_(self):\n", + " return {\n", + " \"singleline\": \"Met Office k-scale data\",\n", + " \"range\": \"20030101\",\n", + " \"Documentation\": \"None\",\n", + " }\n", + "\n", + "\n", + " # This is where the path is hardwired\n", + " def filesystem(\n", + " self,\n", + " querytime: str | Petdt\n", + " ) -> Path | dict[str, str | Path]:\n", + " return Path(\"/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6ae14046-acd0-4daf-bd73-f98088a9c22e", + "metadata": {}, + "outputs": [], + "source": [ + "accessor=kscale(['ss'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "71c406c1-d711-46b6-9307-f24342c444ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
kscale\n",
+       "\tDescription                    Met Office k-scale data\n",
+       "\t\t range                          '20030101'\n",
+       "\t\t Documentation                  'None'\n",
+       "\n",
+       "\n",
+       "\tInitialisation                 \n",
+       "\t\t level_value                    None\n",
+       "\t\t variables                      ['ss']\n",
+       "\tTransforms                     \n",
+       "\t\t StandardCoordinateNames        {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}
" + ], + "text/plain": [ + "kscale\n", + "\tDescription Met Office k-scale data\n", + "\t\t range '20030101'\n", + "\t\t Documentation 'None'\n", + "\n", + "\n", + "\tInitialisation \n", + "\t\t level_value None\n", + "\t\t variables ['ss']\n", + "\tTransforms \n", + "\t\t StandardCoordinateNames {'latitude': \"['lat', 'Latitude', 'yt_ocean', 'yt']\", 'longitude': \"['lon', 'Longitude', 'xt_ocean', 'xt']\", 'replacement_dictionary': 'None', 'time': \"['Time']\"}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accessor" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1172f0bb-08e6-4a11-a55e-5d2216a7adf0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/users/train106/PyEarthTools/packages/data/src/pyearthtools/data/indexes/_indexes.py:480: IndexWarning: Could not find time in dataset to select on. Petdt('2020-01-01')\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 409MB\n",
+       "Dimensions:                     (time: 144, latitude: 296, longitude: 343,\n",
+       "                                 bnds: 2)\n",
+       "Coordinates:\n",
+       "  * time                        (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n",
+       "  * latitude                    (latitude) float32 1kB -19.98 -19.94 ... -8.028\n",
+       "  * longitude                   (longitude) float32 1kB 120.0 120.0 ... 133.9\n",
+       "    forecast_period             (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n",
+       "    forecast_reference_time     datetime64[ns] 8B ...\n",
+       "    level_height                float32 4B ...\n",
+       "    model_level_number          int32 4B ...\n",
+       "    sigma                       float32 4B ...\n",
+       "Dimensions without coordinates: bnds\n",
+       "Data variables:\n",
+       "    m01s30i001                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    latitude_longitude          int32 4B ...\n",
+       "    m01s30i002                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    potential_t_avg_250m        (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    specific_humidity           (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    upward_air_velocity         (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    level_height_bnds           (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    sigma_bnds                  (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    horizontal_wind_divergence  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    total_precip__rain___snow_  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.7
" + ], + "text/plain": [ + " Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array\n", + " level_height_bnds (bnds) float32 8B dask.array\n", + " sigma_bnds (bnds) float32 8B dask.array\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array\n", + "Attributes:\n", + " Conventions: CF-1.7" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accessor['20200101']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a1e0e4f-0958-4bdc-94ec-97e2c5ba78c8", + "metadata": {}, + "outputs": [], + "source": [ + "pipe1 = pyearthtools.pipeline.Pipeline(\n", + " accessor,\n", + ")\n", + "pipe1" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pet_dev_nb_cpu_jasmin", + "language": "python", + "name": "pet_dev_nb_cpu_jasmin" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorial/kscale_access03.ipynb b/notebooks/tutorial/kscale_access03.ipynb new file mode 100644 index 00000000..96b287c3 --- /dev/null +++ b/notebooks/tutorial/kscale_access03.ipynb @@ -0,0 +1,2386 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0ac2eeea-4a38-494b-83e8-2b07060113db", + "metadata": {}, + "source": [ + "# This is the v3 accessor to load a netcdf file\n", + "\n", + "The class has got a more generic name instead of \"kscale\".\n", + "\n", + "The first argument is a list of strings, not yet used!\n", + "\n", + "The second argument is the path to the dataset.\n", + "\n", + "Variable names go into the description.\n", + "\n", + "The description about k-scale data is still there, needs removing." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5515d404-7c2b-498c-ad22-ebb568f3a05a", + "metadata": {}, + "outputs": [], + "source": [ + "import pyearthtools.data\n", + "\n", + "from pyearthtools.data import Petdt\n", + "from pathlib import Path\n", + "from pyearthtools.data.transforms import Transform, TransformCollection\n", + "import pyearthtools.pipeline\n", + "\n", + "# import this for use in the variable interrogation\n", + "import xarray as xr\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a278cddf-f493-4dfb-9db7-c9af1db8e1cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyearthtools.data.indexes._indexes.ArchiveIndex" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pyearthtools.data.indexes.ArchiveIndex" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "371f5788-0065-4316-a9c7-5a11e6d6d4cc", + "metadata": {}, + "outputs": [], + "source": [ + "class netcdf_file(pyearthtools.data.indexes.ArchiveIndex):\n", + "\n", + "\n", + " def __init__(\n", + " self,\n", + " variables: list[str] | str,\n", + " filepath: str,\n", + " *,\n", + " level_value: int | float | list[int | float] | tuple[list | int, ...] | None = None,\n", + " transforms: Transform | TransformCollection | None = None,\n", + " ):\n", + " super().__init__(\n", + " transforms=transforms,\n", + " )\n", + " self.record_initialisation()\n", + "\n", + " self.filepath=filepath\n", + " self.requested_variables=variables\n", + "\n", + " # object member\n", + " self.get_variable_names_from_netcdf()\n", + "\n", + " @property\n", + " def _desc_(self):\n", + " return {\n", + " \"singleline\": \"Met Office k-scale data\",\n", + " \"range\": \"20030101\",\n", + " \"Documentation\": \"None\",\n", + " \"Vars\": self.variables,\n", + " }\n", + "\n", + " \n", + " # This is where the path was hardwired, but now it's passed in\n", + " def filesystem(\n", + " self,\n", + " querytime: str | Petdt\n", + " ) -> Path | dict[str, str | Path]:\n", + " return Path(self.filepath)\n", + "\n", + " def get_variable_names_from_netcdf(self):\n", + " \"\"\"\n", + " Returns the variable names from a NetCDF file.\n", + " \n", + " Parameters:\n", + " file_path (str): Path to the NetCDF file.\n", + " \n", + " Returns:\n", + " list: A list of variable names in the dataset.\n", + " \"\"\"\n", + " # Open the NetCDF file using xarray\n", + " ds = xr.open_dataset(self.filepath)\n", + " \n", + " # Extract variable names (data variables)\n", + " self.variables = list(ds.data_vars.keys())\n", + " \n", + " # Close the dataset to free resources\n", + " ds.close()\n", + "\n", + " # print( variable_names )\n", + " \n", + " # return variable_names\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6ae14046-acd0-4daf-bd73-f98088a9c22e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_7751/3225001281.py:51: FutureWarning: In a future version, xarray will not decode the variable 'forecast_period' into a timedelta64 dtype based on the presence of a timedelta-like 'units' attribute by default. Instead it will rely on the presence of a timedelta64 'dtype' attribute, which is now xarray's default way of encoding timedelta64 values.\n", + "To continue decoding into a timedelta64 dtype, either set `decode_timedelta=True` when opening this dataset, or add the attribute `dtype='timedelta64[ns]'` to this variable on disk.\n", + "To opt-in to future behavior, set `decode_timedelta=False`.\n", + " ds = xr.open_dataset(self.filepath)\n" + ] + } + ], + "source": [ + "accessor=netcdf_file(['var_I_want'],'/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "71c406c1-d711-46b6-9307-f24342c444ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
netcdf_file\n",
+       "\tDescription                    Met Office k-scale data\n",
+       "\t\t range                          '20030101'\n",
+       "\t\t Documentation                  'None'\n",
+       "\t\t Vars                           ['m01s30i001', 'latitude_longitude', 'm01s30i002', 'potential_t_avg_250m', 'specific_humidity', 'upward_air_velocity', 'level_height_bnds', 'sigma_bnds', 'horizontal_wind_divergence', 'total_precip__rain___snow_']\n",
+       "\n",
+       "\n",
+       "\tInitialisation                 \n",
+       "\t\t filepath                       '/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'\n",
+       "\t\t level_value                    None\n",
+       "\t\t variables                      ['var_I_want']\n",
+       "\tTransforms                     \n",
+       "\t\t StandardCoordinateNames        {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}
" + ], + "text/plain": [ + "netcdf_file\n", + "\tDescription Met Office k-scale data\n", + "\t\t range '20030101'\n", + "\t\t Documentation 'None'\n", + "\t\t Vars ['m01s30i001', 'latitude_longitude', 'm01s30i002', 'potential_t_avg_250m', 'specific_humidity', 'upward_air_velocity', 'level_height_bnds', 'sigma_bnds', 'horizontal_wind_divergence', 'total_precip__rain___snow_']\n", + "\n", + "\n", + "\tInitialisation \n", + "\t\t filepath '/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'\n", + "\t\t level_value None\n", + "\t\t variables ['var_I_want']\n", + "\tTransforms \n", + "\t\t StandardCoordinateNames {'latitude': \"['lat', 'Latitude', 'yt_ocean', 'yt']\", 'longitude': \"['lon', 'Longitude', 'xt_ocean', 'xt']\", 'replacement_dictionary': 'None', 'time': \"['Time']\"}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accessor" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1172f0bb-08e6-4a11-a55e-5d2216a7adf0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/users/train106/PyEarthTools/packages/data/src/pyearthtools/data/indexes/_indexes.py:480: IndexWarning: Could not find time in dataset to select on. Petdt('2002-01-01')\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 409MB\n",
+       "Dimensions:                     (time: 144, latitude: 296, longitude: 343,\n",
+       "                                 bnds: 2)\n",
+       "Coordinates:\n",
+       "  * time                        (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n",
+       "  * latitude                    (latitude) float32 1kB -19.98 -19.94 ... -8.028\n",
+       "  * longitude                   (longitude) float32 1kB 120.0 120.0 ... 133.9\n",
+       "    forecast_period             (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n",
+       "    forecast_reference_time     datetime64[ns] 8B ...\n",
+       "    level_height                float32 4B ...\n",
+       "    model_level_number          int32 4B ...\n",
+       "    sigma                       float32 4B ...\n",
+       "Dimensions without coordinates: bnds\n",
+       "Data variables:\n",
+       "    m01s30i001                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    latitude_longitude          int32 4B ...\n",
+       "    m01s30i002                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    potential_t_avg_250m        (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    specific_humidity           (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    upward_air_velocity         (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    level_height_bnds           (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    sigma_bnds                  (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    horizontal_wind_divergence  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    total_precip__rain___snow_  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.7
" + ], + "text/plain": [ + " Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array\n", + " level_height_bnds (bnds) float32 8B dask.array\n", + " sigma_bnds (bnds) float32 8B dask.array\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array\n", + "Attributes:\n", + " Conventions: CF-1.7" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The data isn't for this date but it still works\n", + "accessor['20020101']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2a1e0e4f-0958-4bdc-94ec-97e2c5ba78c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Pipeline\n",
+       "\tDescription                    `pyearthtools.pipeline` Data Pipeline\n",
+       "\n",
+       "\n",
+       "\tInitialisation                 \n",
+       "\t\t exceptions_to_ignore           None\n",
+       "\t\t iterator                       None\n",
+       "\t\t max_exception_count            -1\n",
+       "\t\t name                           None\n",
+       "\t\t sampler                        None\n",
+       "\tSteps                          \n",
+       "\t\t __main__.netcdf_file           {'netcdf_file': {'filepath': "'/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'", 'level_value': 'None', 'variables': "['var_I_want']"}}
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pipe1 = pyearthtools.pipeline.Pipeline(\n", + " accessor,\n", + ")\n", + "pipe1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4bb5643-9dc4-4bd2-8832-b826f12c1ff5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64aa8e5b-641f-4967-a0e5-1adc3dfcc7d8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pet_dev_nb_cpu_jasmin", + "language": "python", + "name": "pet_dev_nb_cpu_jasmin" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorial/kscale_access04.ipynb b/notebooks/tutorial/kscale_access04.ipynb new file mode 100644 index 00000000..c9497d52 --- /dev/null +++ b/notebooks/tutorial/kscale_access04.ipynb @@ -0,0 +1,2456 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0ac2eeea-4a38-494b-83e8-2b07060113db", + "metadata": {}, + "source": [ + "# This is the v4 accessor to load a netcdf file\n", + "\n", + "The class has got a more generic name instead of \"kscale\".\n", + "\n", + "The first argument is a list of strings, not yet used!\n", + "\n", + "The second argument is the path to the dataset.\n", + "\n", + "Variable names and data times go into the description." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5515d404-7c2b-498c-ad22-ebb568f3a05a", + "metadata": {}, + "outputs": [], + "source": [ + "import pyearthtools.data\n", + "\n", + "from pyearthtools.data import Petdt\n", + "from pathlib import Path\n", + "from pyearthtools.data.transforms import Transform, TransformCollection\n", + "import pyearthtools.pipeline\n", + "\n", + "# import this for use in the variable interrogation\n", + "import xarray as xr\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a278cddf-f493-4dfb-9db7-c9af1db8e1cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyearthtools.data.indexes._indexes.ArchiveIndex" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pyearthtools.data.indexes.ArchiveIndex" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "371f5788-0065-4316-a9c7-5a11e6d6d4cc", + "metadata": {}, + "outputs": [], + "source": [ + "class netcdf_file(pyearthtools.data.indexes.ArchiveIndex):\n", + "\n", + "\n", + " def __init__(\n", + " self,\n", + " var_req: list[str] | str,\n", + " filepath: str,\n", + " *,\n", + " level_value: int | float | list[int | float] | tuple[list | int, ...] | None = None,\n", + " transforms: Transform | TransformCollection | None = None,\n", + " ):\n", + " super().__init__(\n", + " transforms=transforms,\n", + " )\n", + " self.record_initialisation()\n", + "\n", + " self.filepath=filepath\n", + " self.requested_variables=var_req\n", + "\n", + " # object member\n", + " self.get_variable_data_from_netcdf()\n", + "\n", + " @property\n", + " def _desc_(self):\n", + " return {\n", + " \"singleline\": \"netcdf data file (assumed?)\",\n", + " \"range\": self.times,\n", + " \"Documentation\": \"None\",\n", + " \"Vars\": self.variables,\n", + " }\n", + "\n", + " \n", + " # This is where the path was hardwired, but now it's passed in\n", + " def filesystem(\n", + " self,\n", + " querytime: str | Petdt\n", + " ) -> Path | dict[str, str | Path]:\n", + " return Path(self.filepath)\n", + "\n", + " def get_variable_data_from_netcdf(self):\n", + " \"\"\"\n", + " Returns the variable names from a NetCDF file.\n", + " \n", + " Parameters:\n", + " file_path (str): Path to the NetCDF file.\n", + " \n", + " Returns:\n", + " list: A list of variable names in the dataset.\n", + " \"\"\"\n", + " # Open the NetCDF file using xarray\n", + " ds = xr.open_dataset(self.filepath)\n", + " \n", + " # Extract variable names (data variables)\n", + " self.variables = list(ds.data_vars.keys())\n", + "\n", + " # get the data times\n", + " # there are various ways...\n", + " #\n", + " # as a pandas.DatetimeIndex\n", + " self.times = ds.indexes['time']\n", + " #\n", + " # as a NumPy array of datetime objects\n", + " ### self.times = ds['time'].values\n", + " #\n", + " # otherwise\n", + " ### self.times = ds.time.values\n", + " \n", + " # Close the dataset to free resources\n", + " ds.close()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6ae14046-acd0-4daf-bd73-f98088a9c22e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_9700/3147708279.py:51: FutureWarning: In a future version, xarray will not decode the variable 'forecast_period' into a timedelta64 dtype based on the presence of a timedelta-like 'units' attribute by default. Instead it will rely on the presence of a timedelta64 'dtype' attribute, which is now xarray's default way of encoding timedelta64 values.\n", + "To continue decoding into a timedelta64 dtype, either set `decode_timedelta=True` when opening this dataset, or add the attribute `dtype='timedelta64[ns]'` to this variable on disk.\n", + "To opt-in to future behavior, set `decode_timedelta=False`.\n", + " ds = xr.open_dataset(self.filepath)\n" + ] + } + ], + "source": [ + "accessor=netcdf_file(['var_I_want'],'/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "71c406c1-d711-46b6-9307-f24342c444ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
netcdf_file\n",
+       "\tDescription                    netcdf data file (assumed?)\n",
+       "\t\t range                          DatetimeIndex(['2003-01-01 00:10:00.000000128',\n",
+       "               '2003-01-01 00:19:59.999999872',\n",
+       "                         '2003-01-01 00:30:00',\n",
+       "               '2003-01-01 00:40:00.000000128',\n",
+       "               '2003-01-01 00:49:59.999999872',\n",
+       "                         '2003-01-01 01:00:00',\n",
+       "               '2003-01-01 01:10:00.000000128',\n",
+       "               '2003-01-01 01:19:59.999999872',\n",
+       "                         '2003-01-01 01:30:00',\n",
+       "               '2003-01-01 01:40:00.000000128',\n",
+       "               ...\n",
+       "                         '2003-01-01 22:30:00',\n",
+       "               '2003-01-01 22:40:00.000000128',\n",
+       "               '2003-01-01 22:49:59.999999872',\n",
+       "                         '2003-01-01 23:00:00',\n",
+       "               '2003-01-01 23:10:00.000000128',\n",
+       "               '2003-01-01 23:19:59.999999872',\n",
+       "                         '2003-01-01 23:30:00',\n",
+       "               '2003-01-01 23:40:00.000000128',\n",
+       "               '2003-01-01 23:49:59.999999872',\n",
+       "                         '2003-01-02 00:00:00'],\n",
+       "              dtype='datetime64[ns]', name='time', length=144, freq=None)\n",
+       "\t\t Documentation                  'None'\n",
+       "\t\t Vars                           ['m01s30i001', 'latitude_longitude', 'm01s30i002', 'potential_t_avg_250m', 'specific_humidity', 'upward_air_velocity', 'level_height_bnds', 'sigma_bnds', 'horizontal_wind_divergence', 'total_precip__rain___snow_']\n",
+       "\n",
+       "\n",
+       "\tInitialisation                 \n",
+       "\t\t filepath                       '/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'\n",
+       "\t\t level_value                    None\n",
+       "\t\t var_req                        ['var_I_want']\n",
+       "\tTransforms                     \n",
+       "\t\t StandardCoordinateNames        {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}
" + ], + "text/plain": [ + "netcdf_file\n", + "\tDescription netcdf data file (assumed?)\n", + "\t\t range DatetimeIndex(['2003-01-01 00:10:00.000000128',\n", + " '2003-01-01 00:19:59.999999872',\n", + " '2003-01-01 00:30:00',\n", + " '2003-01-01 00:40:00.000000128',\n", + " '2003-01-01 00:49:59.999999872',\n", + " '2003-01-01 01:00:00',\n", + " '2003-01-01 01:10:00.000000128',\n", + " '2003-01-01 01:19:59.999999872',\n", + " '2003-01-01 01:30:00',\n", + " '2003-01-01 01:40:00.000000128',\n", + " ...\n", + " '2003-01-01 22:30:00',\n", + " '2003-01-01 22:40:00.000000128',\n", + " '2003-01-01 22:49:59.999999872',\n", + " '2003-01-01 23:00:00',\n", + " '2003-01-01 23:10:00.000000128',\n", + " '2003-01-01 23:19:59.999999872',\n", + " '2003-01-01 23:30:00',\n", + " '2003-01-01 23:40:00.000000128',\n", + " '2003-01-01 23:49:59.999999872',\n", + " '2003-01-02 00:00:00'],\n", + " dtype='datetime64[ns]', name='time', length=144, freq=None)\n", + "\t\t Documentation 'None'\n", + "\t\t Vars ['m01s30i001', 'latitude_longitude', 'm01s30i002', 'potential_t_avg_250m', 'specific_humidity', 'upward_air_velocity', 'level_height_bnds', 'sigma_bnds', 'horizontal_wind_divergence', 'total_precip__rain___snow_']\n", + "\n", + "\n", + "\tInitialisation \n", + "\t\t filepath '/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'\n", + "\t\t level_value None\n", + "\t\t var_req ['var_I_want']\n", + "\tTransforms \n", + "\t\t StandardCoordinateNames {'latitude': \"['lat', 'Latitude', 'yt_ocean', 'yt']\", 'longitude': \"['lon', 'Longitude', 'xt_ocean', 'xt']\", 'replacement_dictionary': 'None', 'time': \"['Time']\"}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accessor" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1172f0bb-08e6-4a11-a55e-5d2216a7adf0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/users/train106/PyEarthTools/packages/data/src/pyearthtools/data/indexes/_indexes.py:480: IndexWarning: Could not find time in dataset to select on. Petdt('2020-01-01')\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 409MB\n",
+       "Dimensions:                     (time: 144, latitude: 296, longitude: 343,\n",
+       "                                 bnds: 2)\n",
+       "Coordinates:\n",
+       "  * time                        (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n",
+       "  * latitude                    (latitude) float32 1kB -19.98 -19.94 ... -8.028\n",
+       "  * longitude                   (longitude) float32 1kB 120.0 120.0 ... 133.9\n",
+       "    forecast_period             (time) timedelta64[ns] 1kB dask.array<chunksize=(144,), meta=np.ndarray>\n",
+       "    forecast_reference_time     datetime64[ns] 8B ...\n",
+       "    level_height                float32 4B ...\n",
+       "    model_level_number          int32 4B ...\n",
+       "    sigma                       float32 4B ...\n",
+       "Dimensions without coordinates: bnds\n",
+       "Data variables:\n",
+       "    m01s30i001                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    latitude_longitude          int32 4B ...\n",
+       "    m01s30i002                  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    potential_t_avg_250m        (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    specific_humidity           (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    upward_air_velocity         (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    level_height_bnds           (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    sigma_bnds                  (bnds) float32 8B dask.array<chunksize=(2,), meta=np.ndarray>\n",
+       "    horizontal_wind_divergence  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "    total_precip__rain___snow_  (time, latitude, longitude) float32 58MB dask.array<chunksize=(144, 296, 343), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.7
" + ], + "text/plain": [ + " Size: 409MB\n", + "Dimensions: (time: 144, latitude: 296, longitude: 343,\n", + " bnds: 2)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 1kB 2003-01-01T00:10:00...\n", + " * latitude (latitude) float32 1kB -19.98 -19.94 ... -8.028\n", + " * longitude (longitude) float32 1kB 120.0 120.0 ... 133.9\n", + " forecast_period (time) timedelta64[ns] 1kB dask.array\n", + " forecast_reference_time datetime64[ns] 8B ...\n", + " level_height float32 4B ...\n", + " model_level_number int32 4B ...\n", + " sigma float32 4B ...\n", + "Dimensions without coordinates: bnds\n", + "Data variables:\n", + " m01s30i001 (time, latitude, longitude) float32 58MB dask.array\n", + " latitude_longitude int32 4B ...\n", + " m01s30i002 (time, latitude, longitude) float32 58MB dask.array\n", + " potential_t_avg_250m (time, latitude, longitude) float32 58MB dask.array\n", + " specific_humidity (time, latitude, longitude) float32 58MB dask.array\n", + " upward_air_velocity (time, latitude, longitude) float32 58MB dask.array\n", + " level_height_bnds (bnds) float32 8B dask.array\n", + " sigma_bnds (bnds) float32 8B dask.array\n", + " horizontal_wind_divergence (time, latitude, longitude) float32 58MB dask.array\n", + " total_precip__rain___snow_ (time, latitude, longitude) float32 58MB dask.array\n", + "Attributes:\n", + " Conventions: CF-1.7" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this is a bogus date but it's not used so just gives a warning\n", + "accessor['20200101']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "2a1e0e4f-0958-4bdc-94ec-97e2c5ba78c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Pipeline\n",
+       "\tDescription                    `pyearthtools.pipeline` Data Pipeline\n",
+       "\n",
+       "\n",
+       "\tInitialisation                 \n",
+       "\t\t exceptions_to_ignore           None\n",
+       "\t\t iterator                       None\n",
+       "\t\t max_exception_count            -1\n",
+       "\t\t name                           None\n",
+       "\t\t sampler                        None\n",
+       "\tSteps                          \n",
+       "\t\t __main__.netcdf_file           {'netcdf_file': {'filepath': "'/gws/ssde/j25a/mmh_storage/train106/wr3_20030101.nc'", 'level_value': 'None', 'var_req': "['var_I_want']"}}
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pipe1 = pyearthtools.pipeline.Pipeline(\n", + " accessor,\n", + ")\n", + "pipe1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4bb5643-9dc4-4bd2-8832-b826f12c1ff5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64aa8e5b-641f-4967-a0e5-1adc3dfcc7d8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pet_dev_nb_cpu_jasmin", + "language": "python", + "name": "pet_dev_nb_cpu_jasmin" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}