diff --git a/environment.yml b/environment.yml index c0161fcb..5ead3f0c 100644 --- a/environment.yml +++ b/environment.yml @@ -1,50 +1,47 @@ -name: reeds2 +name: reeds channels: - - defaults - conda-forge dependencies: - - python=3.11 - - bokeh=3.2 - - click=8.0 - - git-lfs=2.13 + - python=3.14 + - bokeh=3.9 + - click=8.3 + - cmocean=4.0 + - geopandas=1.1 + - git-lfs=3.7 - gitpython=3.1 - - h5py=3.9 - - matplotlib=3.7 - - numpy=1.26 - - openpyxl=3.0 - - pandas=2.0 - - pip=23.2 - - pytables=3.8 - - pytest=7.4 - - requests=2.31 - - scikit-learn=1.2 - - scipy=1.11 - - tqdm=4.65 + - h5py=3.16 + - matplotlib=3.10 + - numpy=2.4 + - openpyxl=3.1 + - pandas=3.0 + - pip=26.1 + - pulp=2.8 + - pytables=3.11 + - pytest=9.0 + - requests=2.33 + - scikit-learn=1.8 + - scipy=1.17 + - shapely=2.1 + - tqdm=4.67 ## vvv The next packages are optional for default settings and may ## vvv be excluded if you're having trouble building the environment. - - ipykernel=6.25 # for interactive python in VS Code - - ipywidgets=8.0 # for jupyter notebooks - - mapclassify=2.5 # more mapping tools + - fiona=1.10 # for interactive maps + - folium=0.20 # for interactive maps + - ipykernel=7.2 # for interactive python in VS Code + - ipywidgets=8.1 # for jupyter notebooks + - mapclassify=2.10 # more mapping tools - mscorefonts=0.0 # extra fonts for plotting (only relevant on linux/HPC) - - networkx=3.1 # for uncommonly-used network analysis postprocessing - - notebook=6.5 # for jupyter notebooks + - myst-parser=5.0 # for building documentation + - networkx=3.6 # for uncommonly-used network analysis postprocessing + - notebook=7.5 # for jupyter notebooks + - python-pptx=1.0 # for postprocessing/compare_cases.py + - sphinx=9.1 # for building documentation + - sphinx_rtd_theme=3.1 # for documentation + - sphinxcontrib-bibtex=2.6 # for documentation ## ^^^ - pip: - - cmocean==3.0.3 - - gdxpds==1.4.0 - - geopandas==0.14.0 - - pulp==2.7.0 - - shapely==2.0.1 + - gamsapi[transfer]==53.5.0 + - gdxpds==4.0.0 ## vvv The next packages are optional for default settings and may ## vvv be excluded if they cause trouble building the environment. - - fiona==1.9.5 # for interactive maps - - folium==0.14.0 # for interactive maps - - gamspy_base==50.5.0 # for tests on the github runner - - myst-parser==2.0.0 # for building documentation - - proj==0.2.0 # for plotting maps - - pyproj==3.6.1 # for plotting maps - - python-pptx==0.6.22 # for postprocessing/compare_cases.py - - sphinx_rtd_theme==2.0.0 # for documentation - - sphinx==7.2.6 # for building documentation - - sphinxcontrib-bibtex==2.6.2 # for documentation - ## ^^^ + - gamspy_base==53.5.0 # for tests on the github runner diff --git a/postprocessing/raw_value_streams.py b/postprocessing/raw_value_streams.py index 7ed4b033..d1ade6ae 100644 --- a/postprocessing/raw_value_streams.py +++ b/postprocessing/raw_value_streams.py @@ -151,7 +151,7 @@ def get_df_jacobian(jacobian_file, var_list=None, con_list=None): coeff (float): Coefficient of the variable in the constraint or objective. ''' start = datetime.now() - df_A = gdxpds.to_dataframe(jacobian_file, 'A', old_interface=False) + df_A = gdxpds.to_dataframe(jacobian_file, 'A') for x in ['j','i']: #For i (equation) and j (variable) sets, I need to dump to csv to get the Text column, ugh x_file = jacobian_file.replace('.gdx',f'_{x}.csv') diff --git a/postprocessing/reValue/reValue.py b/postprocessing/reValue/reValue.py index 064376b2..7dfac90d 100644 --- a/postprocessing/reValue/reValue.py +++ b/postprocessing/reValue/reValue.py @@ -85,7 +85,7 @@ def get_prices(): yrs_less = [y for y in yrs if y < year] max_yr = max(yrs_less) df_ra = gdxpds.to_dataframe(f'{reeds_run_path}/handoff/reeds_data/ccdata_{max_yr}.gdx', - 'net_load_2012', old_interface=False) + 'net_load_2012') if int(df_ra['t'][0]) != year: raise ValueError(f'RA year ({int(df_ra["t"][0])}) does not match current scenario year ({year})') df_ra = df_ra.sort_values('Value', ascending=False) diff --git a/postprocessing/retail_rate_module/calculate_historical_capex.py b/postprocessing/retail_rate_module/calculate_historical_capex.py index 9b8e4df7..7f4a2ada 100644 --- a/postprocessing/retail_rate_module/calculate_historical_capex.py +++ b/postprocessing/retail_rate_module/calculate_historical_capex.py @@ -52,13 +52,11 @@ def get_earliest_cap_costs(inputs_case): cost_cap = gdxpds.to_dataframe( os.path.join(inputs_case, 'inputs.gdx'), 'cost_cap', - old_interface=False ) cost_cap.i = cost_cap.i.str.lower() cost_cap_energy = gdxpds.to_dataframe( os.path.join(inputs_case, 'inputs.gdx'), 'cost_cap_energy', - old_interface=False ) cost_cap_energy.i = cost_cap_energy.i.str.lower() cost_cap = ( @@ -79,7 +77,6 @@ def get_earliest_cap_costs(inputs_case): cost_cap_mult = gdxpds.to_dataframe( os.path.join(inputs_case, 'inputs.gdx'), 'cost_cap_fin_mult_out', - old_interface=False ) cost_cap_mult.i = cost_cap_mult.i.str.lower() cost_cap_mult['t'] = cost_cap_mult['t'].astype(int) @@ -109,7 +106,6 @@ def get_earliest_cap_costs(inputs_case): rsc_dat = gdxpds.to_dataframe( os.path.join(inputs_case, 'inputs.gdx'), 'rsc_dat', - old_interface=False ) rsc_dat.i = rsc_dat.i.str.lower() cost_cap_rsc = ( diff --git a/reeds/input_processing/copy_files.py b/reeds/input_processing/copy_files.py index 5a74bdfa..aa81bebe 100644 --- a/reeds/input_processing/copy_files.py +++ b/reeds/input_processing/copy_files.py @@ -181,7 +181,7 @@ def get_regions_and_agglevel( reeds_path, inputs_case, save_regions_and_agglevel=True, - overwrite=False, + overwrite=True, ): """ Create a regional mapping to help filter for specific regions and aggregation levels. @@ -338,7 +338,7 @@ def get_regions_and_agglevel( # Sort hier_sub by r so that "ord(r)" commands in GAMS result in the properly # ordered outputs - hier_sub['numeric_value'] = hier_sub['r'].str.extract('(\d+)').astype(float) + hier_sub['numeric_value'] = hier_sub['r'].str.extract(r'(\d+)').astype(float) hier_sub = hier_sub.sort_values(by='numeric_value').drop('numeric_value', axis=1) # Output the itlgrp files for mixed and county resolution @@ -481,12 +481,7 @@ def read_banned_tech_file(full_path, filepath, inputs_case, r_county): .apply(list) .apply(sorted) ) - r_all_counties_map = ( - r_county.groupby('r') - ['county'] - .apply(list) - .apply(sorted) - ) + r_all_counties_map = r_county.county.tolist() county_ban_regions = list( r_ban_counties_map .loc[(r_ban_counties_map.isin(r_all_counties_map))] @@ -835,7 +830,7 @@ def write_scalars(scalars, inputs_case): scalars_write = pd.concat([scalars, toadd], axis=0) # Trim trailing decimal zeros - scalars_write.value = scalars_write.value.astype(str).replace('\.0+$', '', regex=True) + scalars_write.value = scalars_write.value.astype(str).replace(r'\.0+$', '', regex=True) scalars_write.to_csv(os.path.join(inputs_case, 'scalars.csv'), header=False) # Rewrite the scalar tables as GAMS-readable definition @@ -1578,7 +1573,7 @@ def main(reeds_path, inputs_case): # #%% Settings for testing ### # reeds_path = reeds.io.reeds_path - # inputs_case = os.path.join(reeds_path,'runs','v20260522_transcostM0_OR_water','inputs_case') + # inputs_case = os.path.join(reeds_path,'runs','v20260605_envM0_Pacific','inputs_case') # ---- Set up logger ---- diff --git a/reeds/input_processing/h5_to_gdx.py b/reeds/input_processing/h5_to_gdx.py index bed3e258..78f0c0b6 100644 --- a/reeds/input_processing/h5_to_gdx.py +++ b/reeds/input_processing/h5_to_gdx.py @@ -49,7 +49,7 @@ def read_inputs(case:str|Path) -> tuple: except KeyError: df = pd.DataFrame(columns=columns) for col in df: - if df[col].dtype == 'O': + if df[col].dtype in ['str', 'O']: df[col] = df[col].str.decode('utf-8') dictin[key] = df return dictin, gamstypes, comments diff --git a/reeds/io.py b/reeds/io.py index 4c13b521..339fb225 100644 --- a/reeds/io.py +++ b/reeds/io.py @@ -205,7 +205,8 @@ def get_zone_nodes(case=None, crs='ESRI:102008', **kwargs): def get_zones( case=None, crs='ESRI:102008', - exclude_water_areas=True, + tolerance:float=100, + exclude_water_areas:bool=True, **kwargs, ) -> gpd.GeoDataFrame: """ @@ -213,6 +214,8 @@ def get_zones( case (str, Path, or None): Path to a ReEDS case. If None, uses the default GSw_ZoneSet from cases.csv. crs (str): Coordinate reference system + tolerance (float) [m]: Degree of simplification of aggregated geometries + (passed to gpd.GeoSeries.simplify_coverage()) **kwargs: ReEDS switch:value pairs (overrides case argument) """ dfcounty = reeds.spatial.get_map('county', source='tiger', crs=crs) @@ -225,6 +228,9 @@ def get_zones( country = dfstates.dissolve().geometry[0] dfzones.geometry = dfzones.intersection(country).buffer(0) + if tolerance: + dfzones.geometry = dfzones.geometry.simplify_coverage(tolerance=tolerance) + return dfzones[['geometry']] @@ -397,7 +403,7 @@ def read_h5(h5path:str|Path, key:str) -> pd.DataFrame: except KeyError: df = pd.DataFrame(columns=columns) for col in df: - if df[col].dtype == 'O': + if df[col].dtype in ['str', 'O']: df[col] = df[col].str.decode('utf-8') return df @@ -674,23 +680,24 @@ def get_switches(case=None, **kwargs): (case if case is not None else reeds_path), 'reeds', 'resource_adequacy', 'ra_switches.csv', ) - asw = pd.read_csv(fpath_asw, index_col='key') - for i, row in asw.iterrows(): + dfra = pd.read_csv(fpath_asw, index_col='key', dtype='object') + ra_switches = {} + for key, row in dfra.iterrows(): if row['dtype'] == 'list': - row.value = row.value.split(',') + ra_switches[key] = row.value.split(',') try: - row.value = [int(i) for i in row.value] + ra_switches[key] = [int(i) for i in row.value] except ValueError: pass elif row['dtype'] == 'boolean': - row.value = False if row.value.lower() == 'false' else True + ra_switches[key] = False if row.value.lower() == 'false' else True elif row['dtype'] == 'str': - row.value = str(row.value) + ra_switches[key] = str(row.value) elif row['dtype'] == 'int': - row.value = int(row.value) + ra_switches[key] = int(row.value) elif row['dtype'] == 'float': - row.value = float(row.value) - sw = pd.concat([sw, asw.value]) + ra_switches[key] = float(row.value) + sw = pd.concat([sw, pd.Series(ra_switches)]) except FileNotFoundError: print(f"{fpath_asw} not found so leaving out resource adequacy switches") ### Add derivative switches @@ -758,7 +765,7 @@ def get_scalars(case=None, full=False): return scalars -def read_h5py_file(filename, decode_strings=False): +def read_h5py_file(filename): """Return dataframe object for a h5py file. This function returns a pandas dataframe of a h5py file. If the file has multiple dataset on it @@ -806,7 +813,7 @@ def read_h5py_file(filename, decode_strings=False): idx_cols.sort() for idx_col in idx_cols: df[idx_col] = pd.Series(f[idx_col]).values - if str(df[idx_col].dtype).startswith('|S') and decode_strings: + if str(df[idx_col].dtype).startswith('|S'): df[idx_col] = df[idx_col].str.decode('utf-8') df = df.set_index(idx_cols) @@ -827,7 +834,7 @@ def read_h5py_file(filename, decode_strings=False): return df -def read_file(filename, parse_timestamps=False, decode_strings=False): +def read_file(filename, parse_timestamps=False): """Return dataframe object of input file for multiple file formats. This function read multiple file formats for h5 file sand returns a dataframe from the file. @@ -857,7 +864,7 @@ def read_file(filename, parse_timestamps=False, decode_strings=False): # datasets that composes the h5 file. For a single dataset we use pandas (since it is the most # convenient) and h5py for the custom h5 file. try: - df = read_h5py_file(filename, decode_strings=decode_strings) + df = read_h5py_file(filename) except TypeError: df = pd.read_hdf(filename) @@ -1661,7 +1668,7 @@ def write_to_h5( key, filepath, attrs={}, - overwrite=False, + overwrite=True, compression='gzip', compression_opts=4, **kwargs, @@ -1671,6 +1678,7 @@ def write_to_h5( if key in list(f): if overwrite: del f[key] + print(f'{key} was already used in {filepath} but is being overwritten') else: raise ValueError(f'{key} is already used in {filepath}') @@ -1694,7 +1702,7 @@ def write_to_h5( data = dfwrite[col] dtype = ( f"S{data.str.len().max()}" - if dfwrite.dtypes[col] == 'O' + if dfwrite.dtypes[col] in ['str', 'O'] else dfwrite.dtypes[col] ) @@ -1810,7 +1818,7 @@ def write_csv_to_inputs_h5( df.columns = df.loc[0].str.replace('*','').values df = df.drop(0) ## No other *'s are allowed - if df.applymap(lambda x: '*' in x).any().any(): + if df.map(lambda x: '*' in x).any().any(): err = ( "'*' characters are only allowed in subset headers.\n" f"{filepath} has at least one disallowed '*' character." @@ -1902,7 +1910,7 @@ def write_profile_to_h5(df, filename, outfolder, compression_opts=4): indexvals.to_series().apply(datetime.datetime.isoformat).reset_index(drop=True) ) f.create_dataset(f'index_{i}', data=timeindex.str.encode('utf-8'), dtype='S30') - elif indexvals.dtype == 'O': + elif indexvals.dtype in ['str', 'O']: f.create_dataset(f'index_{i}', data=indexvals, dtype=f'S{indexvals.map(len).max()}') else: # Other indices can be saved using their data type diff --git a/reeds/resource_adequacy/prep_data.py b/reeds/resource_adequacy/prep_data.py index eebb2f0a..4aa23b1f 100644 --- a/reeds/resource_adequacy/prep_data.py +++ b/reeds/resource_adequacy/prep_data.py @@ -170,7 +170,7 @@ def main(t, casedir, iteration=0): ## e.g. "upv_5" -> "upv", "csp2_3" -> "csp" techs_vre_simplify = dict(zip( techs_vre, - [re.sub('\d?_\d+$', '', i) for i in techs_vre] + [re.sub(r'\d?_\d+$', '', i) for i in techs_vre] )) try: diff --git a/runreeds.py b/runreeds.py index 1cbfc517..91bd69f4 100644 --- a/runreeds.py +++ b/runreeds.py @@ -832,22 +832,19 @@ def setupEnvironment( #%% Check whether the ReEDS conda environment is activated if (not skip_checks) and ( - ('reeds2' not in os.environ['CONDA_DEFAULT_ENV'].lower()) - or (not pd.__version__.startswith('2')) + ('reeds' not in os.environ['CONDA_DEFAULT_ENV'].lower()) + or (not pd.__version__.startswith('3')) ): - print( + err = ( f"Your environment is {os.environ['CONDA_DEFAULT_ENV']} and your pandas " - f"version is {pd.__version__}.\nThe default environment is 'reeds2', with\n" - "pandas version 2.x, so the python parts of ReEDS are unlikely to work.\n" + f"version is {pd.__version__}.\nThe supported environment is 'reeds', with\n" + "pandas version 3.x.\n" "To build the environment for the first time, run:\n" " `conda env create -f environment.yml`\n" "To activate the created environment, run:\n" - " `conda activate reeds2` (or `activate reeds2` on Windows)\n" - "Do you want to continue without activating the environment?" + " `conda activate reeds` (or `activate reeds` on Windows)" ) - confirm_env = str(input("Continue? y/[n]: ") or 'n') - if confirm_env not in ['y','Y','yes','Yes','YES']: - quit() + raise ValueError(err) #%% Load specified case file, infer other settings from cases.csv if cases_suffix in ['', 'default']: