From 09584123f36d22b69e4410125e7cbc4f9c3db3cb Mon Sep 17 00:00:00 2001 From: Christian Dunkerly <23328924+cwdunkerly@users.noreply.github.com> Date: Sat, 21 Sep 2024 17:56:15 -0700 Subject: [PATCH 1/3] Changed ee_download to use high vol. endpoint ee_download.py now requests the timeseries data directly instead of going through cloud storage. Additionally, this downloading is now parallelized in a taskpool, which scales to use 1/2 of user's CPU cores. Removed all references to GCS. --- demo.py | 54 ++++------ gridwxcomp/ee_download.py | 208 +++++++++++++++++--------------------- 2 files changed, 114 insertions(+), 148 deletions(-) diff --git a/demo.py b/demo.py index 1a031fe..e220ff5 100644 --- a/demo.py +++ b/demo.py @@ -1,6 +1,5 @@ import ee -import pyproj -from gridwxcomp.prep_metadata import prep_metadata, get_subgrid_bounds +from gridwxcomp.prep_metadata import prep_metadata from gridwxcomp.ee_download import download_grid_data from gridwxcomp.plot import daily_comparison, monthly_comparison, station_bar_plot from gridwxcomp.calc_bias_ratios import calc_bias_ratios @@ -8,19 +7,17 @@ from gridwxcomp.util import reproject_crs_for_bounds -# initialize earth engine -ee.Initialize() # name of the dataset comparison is being made with -gridded_dataset_name = 'conus404' -# Path to station metadata file with lat/long coords -station_meta_path = './gridwxcomp/example_data/Station_Data.txt' +gridded_dataset_name = 'era5land' # local path for config file -config_path = f'./gridwxcomp/example_data/gridwxcomp_config_{gridded_dataset_name}.ini' +config_path = f'gridwxcomp_config_{gridded_dataset_name}.ini' + +# initialize earth engine +ee.Authenticate() +ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com') -# name of bucket data will be exported to once the earth engine calls complete -export_bucket = 'openet' -# path in bucket to export to -export_path = f'bias_correction_gridwxcomp_testing/gridwxcomp_{gridded_dataset_name}/' +# Path to station metadata file with lat/long coords +station_meta_path = 'openet_station_data/conus_comparison_station_list.csv' # local path for prep_metadata output gridwxcomp_input = f'{gridded_dataset_name}_gridwxcomp_metadata.csv' @@ -79,33 +76,27 @@ 'crs_id': 'ESRI:102004'} } -# User has option to automatically pull bounds, aee TODO in function over assumptions +# User has option to automatically pull bounds, see TODO in function over assumptions # projection_dict['wgs84']['bounds'] = get_subgrid_bounds(gridwxcomp_input, config_path, buffer=25) projection_dict['lcc']['bounds'] = reproject_crs_for_bounds( projection_dict['wgs84']['bounds'], projection_dict['lcc']['resolution'], projection_dict['wgs84']['crs_id'], projection_dict['lcc']['crs_id'], 0) - ''' download_grid_data - The purpose of this module is to make calls to the earth engine API to export gridded timeseries at the station - points contained within the station metadata file generated by prep_metadata. Exported data will be saved to - a cloud storage bucket that the user specifies. + The purpose of this module is to make calls to the earth engine API to + export timeseries for the gridded dataset at the station locations + contained within the station metadata file generated by prep_metadata. + + The data is saved locally as it is downloaded, and the metadata file + generated by prep_metadata is amended with paths to the downloaded files. - The user will need to manually download the data via gsutil before proceeding further. The module amends the - station metadata file to have paths for the gridded dataset as they would exist locally once the user has - downloaded them. - - Example: the example run is with the dataset conus404, the gridded path will be filled in such that if you - download the 'conus404' folder from the bucket to the local directory of trial_runs.py it will work without - further modification. ''' download_grid_data( gridwxcomp_input, config_path, - export_bucket=export_bucket, - export_path=export_path, + local_folder=None, force_download=False) ''' @@ -139,12 +130,9 @@ config_path, dataset_name=gridded_dataset_name) -for var in [ - 'tmax', - 'tmin', - 'eto']: # Iterate over vars in list. Valid entries found in calc_bias_ratios.py VAR_LIST - # path to bias ratios output file - ratio_filepath = f'{output_dir}/{var}_summary_comp_1980_2020.csv' +for var in ['etr', 'eto']: # Iterate over vars in list. Valid entries found in calc_bias_ratios.py VAR_LIST + ratio_filepath = f'{output_dir}/gridded_{var}_summary_comp_1980_2020.csv' # path to bias ratios output file + interpolation_out_path = (f'{var}_invdistnn_p{params["power"]}_' # directory for interpolation outputs f's{params["smoothing"]}_maxpoints{params["max_points"]}_radius{params["radius"]}') @@ -173,7 +161,7 @@ comparison_var=var, grid_id=None, day_limit=10, - years='1980-2020', + years='all', comp=True) ''' diff --git a/gridwxcomp/ee_download.py b/gridwxcomp/ee_download.py index a337608..10df4fb 100644 --- a/gridwxcomp/ee_download.py +++ b/gridwxcomp/ee_download.py @@ -1,16 +1,22 @@ # -*- coding: utf-8 -*- """ -This module has tools to download timeseries climate data from gridded climate data collections that are hosted on the Google Earth Engine. It reads the formatted file that was prepared using the :mod:`gridwxcomp.prep_metadata` module and uses the coordinate information there along with the variable names specified in the configuartion .INI file to know which data to download and for which geographic locations which are paired with the station locations. +This module has tools to download timeseries climate data from gridded climate +data collections that are hosted on Google's Earth Engine. It reads the +formatted file that was prepared using the :mod:`gridwxcomp.prep_metadata` +module and uses the coordinate information there along with the variable names +specified in the configuration .INI file to know which data to download and +for which geographic locations which are paired with the station locations. """ -import datetime + import ee import os import pandas as pd import re - -from google.cloud import storage -from pathlib import Path +import time from gridwxcomp.util import read_config +from pathlib import Path +from multiprocessing.pool import ThreadPool as Pool + def _get_collection_date_range(path): """ @@ -35,82 +41,81 @@ def _get_collection_date_range(path): end_date = end_date[:7] + '-' + end_date[7:] return start_date, end_date -def _download_point_data( - start_date, - end_date, - lat, - lon, - station_name, - dataset_path, - dataset_name, - bucket, - path): - - start_name = start_date.replace('-', '') - end_name = ( - datetime.datetime.strptime( - end_date, - "%Y-%m-%d") - - datetime.timedelta( - days=1)).strftime('%Y%m%d') - # Create point to reduce over - point = ee.Geometry.Point([lon, lat]) +def _download_point_data(param_dict): + """ + Makes reduceRegion call to Earth Engine to extract timeseries for a station. + Data is obtained via getInfo call and then saved locally. + Called from :func:`download_grid_data` using a ThreadPool. + Arguments: + param_dict (dict): dictionary of parameters for reduceRegion call. - ic = ee.ImageCollection(dataset_path).filterDate(start_date, end_date) + Returns: + None - station_string = station_name - station_name = ee.String(station_name) - file_description = '{}_{}_{}_{}'.format( - dataset_name, station_string, start_name, end_name) - complete_path = path + file_description + '_all_vars' + Note: You must authenticate with Google Earth Engine before using + this function. + """ + # todo: change force_download (bool) to download_method (string) + # and provide option for appending latest data + # Don't re-download file unless force_download is True + if (os.path.exists(param_dict['GRID_FILE_PATH']) and + not param_dict['FORCE_DOWNLOAD']): + print(f'{param_dict["GRID_FILE_PATH"]} already exists,' + f' skipping download.\n') + return + + # Time download process + start_time = time.time() + + # get image properties + ic = (ee.ImageCollection(param_dict['DATASET_PATH']). + filterDate(param_dict['START_DATE'], param_dict['END_DATE'])) + bands = ic.first().bandNames().getInfo() + scale = ic.first().projection().nominalScale().getInfo() / 10 # in meters + + # Create point to reduce over + point = ee.Geometry.Point([param_dict['STATION_LON_WGS84'], + param_dict['STATION_LAT_WGS84']]) def _reduce_point_img(img): date_str = img.date() date_mean = date_str.format("YYYYMMdd") - # TODO add nominal scale call and set reducing scale to it reduce_mean = img.reduceRegion(geometry=point, reducer=ee.Reducer.mean(), - crs='EPSG:4326', - scale=1000) + crs='EPSG:4326', scale=scale) return ee.Feature(None, reduce_mean).set( - {"date": date_mean, 'station_name': station_name}) + {"date": date_mean, 'station_name': param_dict['STATION_ID']}) - output = ee.FeatureCollection(ic.map(_reduce_point_img)) + # function to create output stats list + def _summary_feature_col(ftr): + output_list = [ftr.get('date'), ftr.get('station_name')] + for band in bands: + output_list.append(ftr.get(band)) - # Export Summary Table - task = ee.batch.Export.table.toCloudStorage( - collection=ee.FeatureCollection(output), - description=file_description, - bucket=bucket, fileNamePrefix=complete_path, fileFormat='CSV', - ) + return ftr.set({'output': output_list}) - task.start() + output_stats = (ee.FeatureCollection(ic.map(_reduce_point_img)) + .map(_summary_feature_col)) + output_timeseries = output_stats.aggregate_array('output').getInfo() - print( - 'Request submitted for {}_{}_{}_{}'.format( - dataset_name, - station_string, - start_name, - end_name)) - print(f'Waiting for task (id: {task.id}) to complete ...') - while task.active(): - continue - - -def download_grid_data( - metadata_path, - config_path, - export_bucket, - export_path, - local_folder=None, - force_download=False): + column_names = ['date', 'station_name'] + bands + output_df = pd.DataFrame(data=output_timeseries, columns=column_names) + output_df.to_csv(param_dict['GRID_FILE_PATH'], index=False) + + execution_minutes = (time.time() - start_time) / 60 + print(f'\n{param_dict["GRID_FILE_PATH"]} downloaded in ' + f'{execution_minutes:.2f} minutes.') + + +def download_grid_data(metadata_path, config_path, + local_folder=None, force_download=False): """ Takes in the metadata file generated by :func:`gridwxcomp.prep_metadata` and downloads the corresponding point data for all stations within. This - function requires the dataset be accesible in the user's Google Earth Engine - account, and the Google data collection name and path should be specified + function requires the dataset be accessible in the user's Google Earth Engine + account, and the image collection name and path should be specified in the configuration .INI file (i.e., in the ``config_path`` file). The metadata file will be updated for the path the gridded data files @@ -120,20 +125,16 @@ def download_grid_data( metadata_path (str): path to the metadata path generated by :func:`gridwxcomp.prep_metadata` config_path (str): path to config file containing catalog info - export_bucket (str): name of the Google cloud bucket for export - export_path (str): path within bucket where data is going to be saved local_folder (str): folder to download point data to - force_download (bool): will re-download all data even if bucket already - exists + force_download (bool): will re-download all data even if local file + already exists Returns: None Note: You must authenticate with Google Earth Engine before using this function. - """ - config = read_config(config_path) # Read config # Pull relevant metadata from dictionary dataset = config['collection_info']['name'] @@ -149,66 +150,43 @@ def download_grid_data( if gridded_dataset_date_end == '': gridded_dataset_date_end = collection_end_date - gridded_dataset_end_name = ( - datetime.datetime.strptime( - gridded_dataset_date_end, - "%Y-%m-%d") - - datetime.timedelta( - days=1)).strftime('%Y%m%d') - # Open gridwxcomp station metadata file metadata_df = pd.read_csv(metadata_path) metadata_df['GRID_FILE_PATH'] = '' - # Connect to google bucket to check which files have been downloaded - storage_client = storage.Client() - storage_bucket = storage_client.bucket(export_bucket) - - # Iterate over metadata_df + # Iterate over metadata_df to fill in other columns for index, row in metadata_df.iterrows(): - print(f'Extracting {dataset} data for: {row["STATION_ID"]}') formatted_station_id = re.sub( r'\W+', '', row["STATION_ID"].replace(' ', '_')).lower() - print(f'Formatted Station_ID: {formatted_station_id}') if local_folder: Path(f'{local_folder}/{dataset}').mkdir(parents=True, exist_ok=True) local_path = (f'{local_folder}/{dataset}/{dataset}_' - f'{formatted_station_id}_' - f'{gridded_dataset_date_start.replace("-", "")}_' - f'{gridded_dataset_end_name}_all_vars.csv') + f'{formatted_station_id}.csv') else: Path(f'{dataset}').mkdir(parents=True, exist_ok=True) - local_path = f'{dataset}/{dataset}_{formatted_station_id}_' \ - f'{gridded_dataset_date_start.replace("-", "")}_{gridded_dataset_end_name}_all_vars.csv' - - cloud_path = f'{export_path}{dataset}_{formatted_station_id}_' \ - f'{gridded_dataset_date_start.replace("-", "")}_{gridded_dataset_end_name}_all_vars.csv' - - metadata_df.loc[index, 'GRID_FILE_PATH'] = Path(local_path).absolute() - - # Check if file exists on the cloud and skip unless force_download is - # true - gcloud_blob = storage.Blob(bucket=storage_bucket, name=cloud_path) - cloud_file_exists = gcloud_blob.exists(storage_client) - if cloud_file_exists and not force_download: - print( - f'gs://{export_bucket}/{cloud_path} already exists, skipping.') - else: - _download_point_data( - start_date=gridded_dataset_date_start, - end_date=gridded_dataset_date_end, - lat=row['STATION_LAT_WGS84'], - lon=row['STATION_LON_WGS84'], - station_name=str(formatted_station_id), - dataset_path=gridded_dataset_path, - dataset_name=dataset, - bucket=export_bucket, - path=export_path) - if not os.path.exists(local_path): - print('Downloading', local_path, '...') - gcloud_blob.download_to_filename(local_path) + local_path = f'./{dataset}/{dataset}_{formatted_station_id}.csv' + + absolute_file_path = Path(local_path).absolute() + metadata_df.loc[index, 'GRID_FILE_PATH'] = absolute_file_path + + # restructure metadata to make iterating simpler + iterable_df = metadata_df[ + ['STATION_ID', 'STATION_LAT_WGS84', + 'STATION_LON_WGS84', 'GRID_FILE_PATH']].copy(deep=True) + iterable_df['START_DATE'] = gridded_dataset_date_start + iterable_df['END_DATE'] = gridded_dataset_date_end + iterable_df['DATASET_PATH'] = gridded_dataset_path + iterable_df['FORCE_DOWNLOAD'] = force_download + iterable_list = iterable_df.to_dict('records') + + # open multiprocessing pool + thread_count = int(os.cpu_count() / 2) + pool = Pool(thread_count) + pool.map(_download_point_data, iterable_list) + pool.close() + pool.join() metadata_df.to_csv(metadata_path, index=False) print( From c8863429744a722d79185f090ee676d8ddf3501c Mon Sep 17 00:00:00 2001 From: Christian Dunkerly <23328924+cwdunkerly@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:31:52 -0800 Subject: [PATCH 2/3] re-added precipitation after it was removed on previous build --- demo.py | 15 ++++++------ gridwxcomp/calc_bias_ratios.py | 2 +- gridwxcomp/ee_download.py | 12 ++++++---- .../example_data/gridwxcomp_config_agera5.ini | 6 +++++ .../gridwxcomp_config_conus404.ini | 6 +++++ gridwxcomp/plot.py | 11 +++++---- gridwxcomp/util.py | 24 +++++++++++++++---- 7 files changed, 54 insertions(+), 22 deletions(-) diff --git a/demo.py b/demo.py index e220ff5..fd1f677 100644 --- a/demo.py +++ b/demo.py @@ -8,17 +8,16 @@ # name of the dataset comparison is being made with -gridded_dataset_name = 'era5land' +gridded_dataset_name = 'conus404' # local path for config file -config_path = f'gridwxcomp_config_{gridded_dataset_name}.ini' +config_path = f'gridwxcomp/example_data/gridwxcomp_config_{gridded_dataset_name}.ini' +# Path to station metadata file with lat/long coords +station_meta_path = 'gridwxcomp/example_data/Station_Data.txt' # initialize earth engine ee.Authenticate() ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com') -# Path to station metadata file with lat/long coords -station_meta_path = 'openet_station_data/conus_comparison_station_list.csv' - # local path for prep_metadata output gridwxcomp_input = f'{gridded_dataset_name}_gridwxcomp_metadata.csv' # Directory that bias ratio/interpolation outputs will be saved to @@ -130,8 +129,8 @@ config_path, dataset_name=gridded_dataset_name) -for var in ['etr', 'eto']: # Iterate over vars in list. Valid entries found in calc_bias_ratios.py VAR_LIST - ratio_filepath = f'{output_dir}/gridded_{var}_summary_comp_1980_2020.csv' # path to bias ratios output file +for var in ['prcp', 'etr', 'eto']: # Iterate over vars in list. Valid entries found in calc_bias_ratios.py VAR_LIST + ratio_filepath = f'{output_dir}/{var}_summary_comp_all_yrs.csv' # path to bias ratios output file interpolation_out_path = (f'{var}_invdistnn_p{params["power"]}_' # directory for interpolation outputs f's{params["smoothing"]}_maxpoints{params["max_points"]}_radius{params["radius"]}') @@ -169,7 +168,7 @@ Generates boxplots of the bias ratios to visualize overall performance. Requires the outputs of calc_bias_ratios as an input. ''' - station_bar_plot(ratio_filepath, bar_plot_layer='growseason_mean') + station_bar_plot(ratio_filepath, bar_plot_layer='grow_mean') ''' make grid diff --git a/gridwxcomp/calc_bias_ratios.py b/gridwxcomp/calc_bias_ratios.py index 6fcfc88..d51a06d 100644 --- a/gridwxcomp/calc_bias_ratios.py +++ b/gridwxcomp/calc_bias_ratios.py @@ -23,7 +23,7 @@ from .util import parse_yr_filter, read_config, read_data, convert_units -VAR_LIST = ['tmax', 'tmin', 'tdew', 'rs', 'wind', 'ea', 'rhmax', 'rhmin', 'rhavg', 'eto', 'etr'] +VAR_LIST = ['tmax', 'tmin', 'tdew', 'rs', 'wind', 'ea', 'rhmax', 'rhmin', 'rhavg', 'eto', 'etr', 'prcp'] GROW_THRESH = 65 SUM_THRESH = 35 ANN_THRESH = 125 diff --git a/gridwxcomp/ee_download.py b/gridwxcomp/ee_download.py index 10df4fb..3d108bd 100644 --- a/gridwxcomp/ee_download.py +++ b/gridwxcomp/ee_download.py @@ -9,11 +9,12 @@ """ import ee +import googleapiclient import os import pandas as pd import re import time -from gridwxcomp.util import read_config +from gridwxcomp.util import read_config, affine_transform from pathlib import Path from multiprocessing.pool import ThreadPool as Pool @@ -58,6 +59,7 @@ def _download_point_data(param_dict): """ # todo: change force_download (bool) to download_method (string) # and provide option for appending latest data + # Don't re-download file unless force_download is True if (os.path.exists(param_dict['GRID_FILE_PATH']) and not param_dict['FORCE_DOWNLOAD']): @@ -72,7 +74,8 @@ def _download_point_data(param_dict): ic = (ee.ImageCollection(param_dict['DATASET_PATH']). filterDate(param_dict['START_DATE'], param_dict['END_DATE'])) bands = ic.first().bandNames().getInfo() - scale = ic.first().projection().nominalScale().getInfo() / 10 # in meters + projection = ic.first().projection() + transform = affine_transform(ic.first()) # Create point to reduce over point = ee.Geometry.Point([param_dict['STATION_LON_WGS84'], @@ -81,9 +84,10 @@ def _download_point_data(param_dict): def _reduce_point_img(img): date_str = img.date() date_mean = date_str.format("YYYYMMdd") + reduce_mean = img.reduceRegion(geometry=point, reducer=ee.Reducer.mean(), - crs='EPSG:4326', scale=scale) + crs=projection, crsTransform=transform) return ee.Feature(None, reduce_mean).set( {"date": date_mean, 'station_name': param_dict['STATION_ID']}) @@ -99,7 +103,6 @@ def _summary_feature_col(ftr): output_stats = (ee.FeatureCollection(ic.map(_reduce_point_img)) .map(_summary_feature_col)) output_timeseries = output_stats.aggregate_array('output').getInfo() - column_names = ['date', 'station_name'] + bands output_df = pd.DataFrame(data=output_timeseries, columns=column_names) output_df.to_csv(param_dict['GRID_FILE_PATH'], index=False) @@ -108,7 +111,6 @@ def _summary_feature_col(ftr): print(f'\n{param_dict["GRID_FILE_PATH"]} downloaded in ' f'{execution_minutes:.2f} minutes.') - def download_grid_data(metadata_path, config_path, local_folder=None, force_download=False): """ diff --git a/gridwxcomp/example_data/gridwxcomp_config_agera5.ini b/gridwxcomp/example_data/gridwxcomp_config_agera5.ini index 5f84199..9bedeb1 100644 --- a/gridwxcomp/example_data/gridwxcomp_config_agera5.ini +++ b/gridwxcomp/example_data/gridwxcomp_config_agera5.ini @@ -67,6 +67,7 @@ station_rhmin_col = RHMin (%) station_rhavg_col = RHAvg (%) station_eto_col = ETo (mm) station_etr_col = ETr (mm) +station_prcp_col = Precip (mm) gridded_date_col = date gridded_tmax_col = Temperature_Air_2m_Max_24h @@ -80,6 +81,7 @@ gridded_rhmin_col = gridded_rhavg_col = Relative_Humidity_2m_15h gridded_eto_col = gridded_etr_col = +gridded_prcp_col = Precipitation_Flux [UNITS] # For the parameters in this section, enter the corresponding units from the options commented above. @@ -107,3 +109,7 @@ gridded_rh_units = percent # inches, mm station_et_units = mm gridded_et_units = mm + +# inches, mm +station_prcp_units = mm +gridded_prcp_units = mm diff --git a/gridwxcomp/example_data/gridwxcomp_config_conus404.ini b/gridwxcomp/example_data/gridwxcomp_config_conus404.ini index b92b7de..773510f 100644 --- a/gridwxcomp/example_data/gridwxcomp_config_conus404.ini +++ b/gridwxcomp/example_data/gridwxcomp_config_conus404.ini @@ -66,6 +66,7 @@ station_rhmin_col = RHMin (%) station_rhavg_col = RHAvg (%) station_eto_col = ETo (mm) station_etr_col = ETr (mm) +station_prcp_col = Precip (mm) gridded_date_col = date gridded_tmax_col = T2_MAX @@ -79,6 +80,7 @@ gridded_rhmin_col = gridded_rhavg_col = gridded_eto_col = ETO_ASCE gridded_etr_col = ETR_ASCE +gridded_prcp_col = PREC_ACC_NC [UNITS] # For the parameters in this section, enter the corresponding units from the options commented above. @@ -106,3 +108,7 @@ gridded_rh_units = # inches, mm station_et_units = mm gridded_et_units = mm + +# inches, mm +station_prcp_units = mm +gridded_prcp_units = mm diff --git a/gridwxcomp/plot.py b/gridwxcomp/plot.py index dfcf31f..443f2eb 100644 --- a/gridwxcomp/plot.py +++ b/gridwxcomp/plot.py @@ -28,11 +28,12 @@ 'rhmin', 'rhavg', 'eto', - 'etr'] + 'etr', + 'prcp'] UNITS_DICT = {'tmax': '(C)', 'tmin': '(C)', 'tdew': '(C)', 'rs': '(w/m2)', 'wind': '(m/s)', 'ea': '(kpa)', 'rhmax': '(%)', 'rhmin': '(%)', 'rhavg': '(%)', - 'eto': '(mm)', 'etr': '(mm)'} + 'eto': '(mm)', 'etr': '(mm)', 'prcp': '(mm)'} # TITLES_LIST and MONTHLY_TITLES_LIST are formatted as LaTeX TITLES_LIST = [ @@ -46,7 +47,8 @@ '$$RH\\:Minimum$$', '$$RH\\:Average$$', '$$ET_{O}$$', - '$$ET_{r}$$'] + '$$ET_{r}$$', + '$$Precipitation$$'] MONTHLY_TITLES_LIST = [ '$$Maximum\\:Temperature\\:Monthly\\:Averages$$', @@ -59,7 +61,8 @@ '$$RH\\:Minimum\\:Monthly\\:Averages$$', '$$RH\\:Average\\:Monthly\\:Averages$$', '$$ET_{O}\\:Monthly\\:Averages$$', - '$$ET_{r}\\:Monthly\\:Averages$$'] + '$$ET_{r}\\:Monthly\\:Averages$$', + '$$Precipitation\\:Monthly\\:Averages$$'] # list of x (station), y (gridded) variables diff --git a/gridwxcomp/util.py b/gridwxcomp/util.py index e508dcd..2eaafdc 100644 --- a/gridwxcomp/util.py +++ b/gridwxcomp/util.py @@ -3,6 +3,7 @@ Utility functions or classes for ``gridwxcomp`` package """ import configparser as cp +import ee import numpy as np import os import pandas as pd @@ -11,6 +12,21 @@ import pyproj +def affine_transform(img): + """ + Get the affine transform of the image as an EE object + + Arguments: + img: ee.Image object + + Returns + ee.List object + + """ + return ee.List(ee.Dictionary( + ee.Algorithms.Describe(img.projection())).get('transform')) + + def parse_yr_filter(dt_df, years, label): """ Parse string year filter and apply it to datetime-indexed @@ -255,7 +271,7 @@ def read_data(config_dictionary, version, filepath): raw_file_data.set_index('date', drop=True, inplace=True) # iterate through an expected list of vars and append a column should one be missing, to prevent a key error later - var_list = ['tmax', 'tmin', 'tdew', 'rs', 'wind', 'rhmax', 'rhmin', 'rhavg', 'ea', 'eto', 'etr'] + var_list = ['tmax', 'tmin', 'tdew', 'rs', 'wind', 'rhmax', 'rhmin', 'rhavg', 'ea', 'eto', 'etr', 'prcp'] for var in var_list: var_col = version + var + '_col' @@ -297,8 +313,8 @@ def convert_units(config_dictionary, version, df): converted_df = df.copy(deep=True) # iterate through list of vars to convert each # todo make these lists into a dict, and allow for column order parameters in the config file instead of names - var_list = ['tmax', 'tmin', 'tdew', 'rs', 'wind', 'ea', 'rhmax', 'rhmin', 'rhavg', 'eto', 'etr'] - units_list = ['temp', 'temp', 'temp', 'solar', 'wind', 'ea', 'rh', 'rh', 'rh', 'et', 'et'] + var_list = ['tmax', 'tmin', 'tdew', 'rs', 'wind', 'ea', 'rhmax', 'rhmin', 'rhavg', 'eto', 'etr', 'prcp'] + units_list = ['temp', 'temp', 'temp', 'solar', 'wind', 'ea', 'rh', 'rh', 'rh', 'et', 'et', 'prcp'] for i in range(len(var_list)): var_col = version + var_list[i] + '_col' var_units_key = version + units_list[i] + '_units' @@ -378,7 +394,7 @@ def convert_units(config_dictionary, version, df): '\n\n\'{}\' was specified in the config file as having units \'{}\' which is not a valid option.' .format(var_units_key, config_dictionary[var_units_key])) - elif units_list[i] == 'et': + elif units_list[i] == 'et' or units_list[i] == 'prcp': if var_units == 'mm': converted_data = np.array(df[var_list[i]]) elif var_units == 'inches' or var_units == 'in': From 1e68f538122a17132f9e99626ef0f043a27d19df Mon Sep 17 00:00:00 2001 From: Christian Dunkerly <23328924+cwdunkerly@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:12:02 -0800 Subject: [PATCH 3/3] adjusted tests to work with ee high endpoint approach --- gridwxcomp/ee_download.py | 2 +- tests/test_gridwxcomp.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/gridwxcomp/ee_download.py b/gridwxcomp/ee_download.py index 3d108bd..00a91cb 100644 --- a/gridwxcomp/ee_download.py +++ b/gridwxcomp/ee_download.py @@ -9,7 +9,6 @@ """ import ee -import googleapiclient import os import pandas as pd import re @@ -111,6 +110,7 @@ def _summary_feature_col(ftr): print(f'\n{param_dict["GRID_FILE_PATH"]} downloaded in ' f'{execution_minutes:.2f} minutes.') + def download_grid_data(metadata_path, config_path, local_folder=None, force_download=False): """ diff --git a/tests/test_gridwxcomp.py b/tests/test_gridwxcomp.py index 80b988f..2323c1c 100644 --- a/tests/test_gridwxcomp.py +++ b/tests/test_gridwxcomp.py @@ -218,8 +218,6 @@ def test_download_grid_data_conus404(self, data): download_grid_data( data['prep_metadata_outpath'], config_path=data['conus404_config_path'], - export_bucket='openet', - export_path=self.export_path, local_folder='tests', force_download=False) @@ -248,8 +246,6 @@ def test_download_grid_data_conus404_overwrite(self, data): download_grid_data( data['prep_metadata_outpath'], config_path=data['conus404_config_path'], - export_bucket='openet', - export_path=self.export_path, local_folder='tests', force_download=True)