From a6d84473325f5318dd9ccb6c1fde7466e263007f Mon Sep 17 00:00:00 2001 From: Sam Stewart Date: Wed, 13 May 2026 19:46:44 +0100 Subject: [PATCH] feat: added support for Azure sensitive column and fixed sanitization --- cur_sanitize.py | 34 ++- docs/src/howto/config_modules.md | 4 +- docs/src/modules.md | 25 +- .../com/digitalpebble/spruce/AzureColumn.java | 4 +- .../com/digitalpebble/spruce/modules/PUE.java | 26 ++- .../digitalpebble/spruce/modules/Water.java | 25 +- .../spruce/modules/ccf/azure/Storage.java | 214 ++++++++++++++++++ .../modules/ccf/azure/package-info.java | 7 + src/main/resources/azure-pue-wue.csv | 78 +++++++ src/main/resources/ccf/azure-storage.json | 64 ++++++ src/main/resources/default-config-azure.json | 23 ++ .../com/digitalpebble/spruce/ConfigTest.java | 9 +- .../digitalpebble/spruce/modules/PUETest.java | 41 +++- .../{WaterTest.java => WaterAWSTest.java} | 3 +- .../spruce/modules/WaterAzureTest.java | 132 +++++++++++ .../spruce/modules/ccf/azure/StorageTest.java | 106 +++++++++ 16 files changed, 775 insertions(+), 20 deletions(-) create mode 100644 src/main/java/com/digitalpebble/spruce/modules/ccf/azure/Storage.java create mode 100644 src/main/java/com/digitalpebble/spruce/modules/ccf/azure/package-info.java create mode 100644 src/main/resources/azure-pue-wue.csv create mode 100644 src/main/resources/ccf/azure-storage.json rename src/test/java/com/digitalpebble/spruce/modules/{WaterTest.java => WaterAWSTest.java} (99%) create mode 100644 src/test/java/com/digitalpebble/spruce/modules/WaterAzureTest.java create mode 100644 src/test/java/com/digitalpebble/spruce/modules/ccf/azure/StorageTest.java diff --git a/cur_sanitize.py b/cur_sanitize.py index 2c1729a..a77d438 100644 --- a/cur_sanitize.py +++ b/cur_sanitize.py @@ -16,6 +16,34 @@ "resource_tags", "savings_plan_savings_plan_a_r_n", "reservation_reservation_a_r_n", + #Azure + "AccountOwnerId", + "AccountName", + "SubscriptionId", + "SubscriptionName", + "ResourceGroup", + "ResourceId", + "ResourceName", + "InstanceId", + "Tags", + "AdditionalInfo", + "ServiceInfo1", + "ServiceInfo2", + "BillingAccountId", + "BillingAccountName", + "BillingProfileId", + "BillingProfileName", + "InvoiceSectionId", + "InvoiceSectionName", + "CostCenter", + "ReservationId", + "ReservationName", + "ProductOrderId", + "ProductOrderName", + "CostAllocationRuleName", + "benefitId", + "benefitName" + ] # Prefix patterns — any column starting with these will be dropped @@ -35,9 +63,13 @@ def get_sensitive_columns(con, table_name: str) -> list[str]: ] to_drop = set() + #Fall back for any case inconsitencies + exact_lower = set(c.lower() for c in SENSITIVE_COLUMNS_EXACT) for col in all_columns: - if col in SENSITIVE_COLUMNS_EXACT: + if col in SENSITIVE_COLUMNS_EXACT : + to_drop.add(col) + elif col.lower() in exact_lower: to_drop.add(col) elif any(col.startswith(prefix) for prefix in SENSITIVE_PREFIXES): to_drop.add(col) diff --git a/docs/src/howto/config_modules.md b/docs/src/howto/config_modules.md index 2bdb581..3a3e845 100644 --- a/docs/src/howto/config_modules.md +++ b/docs/src/howto/config_modules.md @@ -15,8 +15,8 @@ The AWS default looks like this: {{#include ../../../src/main/resources/default-config-aws.json}} ``` -The Azure default is a smaller pipeline — it covers networking energy and carbon intensity, -but does not yet include compute (Boavizta), PUE, Water, or OperationalEmissions modules: +The Azure pipeline is the current focus of our work, with basic services +currently covered or about to be: ```json {{#include ../../../src/main/resources/default-config-azure.json}} diff --git a/docs/src/modules.md b/docs/src/modules.md index d20a2de..deec9b4 100644 --- a/docs/src/modules.md +++ b/docs/src/modules.md @@ -32,6 +32,23 @@ The HDD and SSD coefficients (in Wh per TB-hour) can be overridden via configura **Output column**: `operational_energy_kwh`. +### ccf.azure.Storage + +Provides an estimate of energy used for Azure storage by applying the same +Cloud Carbon Footprint storage coefficients used by `ccf.aws.Storage`. +Service-specific replication factors are applied. Managed disks are estimated +from their provisioned capacity. + +The HDD and SSD coefficients (in Wh per TB-hour) can be overridden via +configuration using the same keys as `ccf.aws.Storage`: + +| Key | Default | Description | +|---|---|---| +| `hdd_coefficient_tb_h` | 0.65 | Energy per TB-hour for HDD storage | +| `ssd_coefficient_tb_h` | 1.2 | Energy per TB-hour for SSD storage | + +**Output column**: `operational_energy_kwh`. + ### ccf.aws.Accelerators Provides an estimate of energy used by accelerators, following the approach used by the [Cloud Carbon Footprint](https://www.cloudcarbonfootprint.org/) project. @@ -109,13 +126,19 @@ Extracts the region information from the input and stores it in a standard locat Uses the 2024 data published by AWS for [Power Usage Effectiveness](https://sustainability.aboutamazon.com/aws-wue-pue.csv) to rows for which energy usage has been estimated. This provides a more accurate and up to date approach than the flat rate approach in the [CCF methodology](https://www.cloudcarbonfootprint.org/docs/methodology/#pue). +The source for Azure is https://datacenters.microsoft.com/sustainability/efficiency/. + +The PUE module supports both AWS and Azure providers and loads the appropriate resource file based on the provider: +- AWS: `aws-pue-wue.csv` +- Azure: `azure-pue-wue.csv` + **Output column**: `power_usage_effectiveness`. ## Water Estimates water consumption associated with cloud usage, producing three columns: -* **`water_cooling_l`** – the volume of water (in litres) used for **data centre cooling**. Computed as `operational_energy_kwh` × `power_usage_effectiveness` × WUE, where WUE (Water Usage Effectiveness) is the ratio of litres of water consumed for cooling per kWh of IT energy. The per-region WUE values come from the [2024 data published by AWS](https://sustainability.aboutamazon.com/aws-wue-pue.csv). +* **`water_cooling_l`** – the volume of water (in litres) used for **data centre cooling**. Computed as `operational_energy_kwh` × `power_usage_effectiveness` × WUE, where WUE (Water Usage Effectiveness) is the ratio of litres of water consumed for cooling per kWh of IT energy. The per-region WUE values come from the [2024 data published by AWS](https://sustainability.aboutamazon.com/aws-wue-pue.csv). The source for Azure is https://datacenters.microsoft.com/sustainability/efficiency/. * **`water_electricity_production_l`** – the volume of water (in litres) consumed during **electricity generation** to power the data centre. Computed as `operational_energy_kwh` × `power_usage_effectiveness` × WCF, where WCF (Water Consumption Factor) represents the litres of water consumed per kWh of electricity generated. The WCF values per electricity grid zone are sourced from the [WRI methodology for calculating water use embedded in purchased electricity](https://www.wri.org/data/dataset-guidance-calculating-water-use-embedded-purchased-electricity). diff --git a/src/main/java/com/digitalpebble/spruce/AzureColumn.java b/src/main/java/com/digitalpebble/spruce/AzureColumn.java index fa01c85..29899a5 100644 --- a/src/main/java/com/digitalpebble/spruce/AzureColumn.java +++ b/src/main/java/com/digitalpebble/spruce/AzureColumn.java @@ -14,9 +14,11 @@ public class AzureColumn extends NativeColumn { public static AzureColumn RESOURCE_LOCATION = new AzureColumn("ResourceLocation", StringType); public static AzureColumn METER_CATEGORY = new AzureColumn("MeterCategory", StringType); public static AzureColumn METER_SUBCATEGORY = new AzureColumn("MeterSubCategory", StringType); + public static AzureColumn METER_NAME = new AzureColumn("MeterName", StringType); + public static AzureColumn UNIT_OF_MEASURE = new AzureColumn("UnitOfMeasure", StringType); public static AzureColumn QUANTITY = new AzureColumn("Quantity", DoubleType); AzureColumn(String l, DataType t) { super(l, t); } -} \ No newline at end of file +} diff --git a/src/main/java/com/digitalpebble/spruce/modules/PUE.java b/src/main/java/com/digitalpebble/spruce/modules/PUE.java index 25a500c..297a592 100644 --- a/src/main/java/com/digitalpebble/spruce/modules/PUE.java +++ b/src/main/java/com/digitalpebble/spruce/modules/PUE.java @@ -4,6 +4,7 @@ import com.digitalpebble.spruce.Column; import com.digitalpebble.spruce.EnrichmentModule; +import com.digitalpebble.spruce.Provider; import com.digitalpebble.spruce.SpruceColumn; import com.digitalpebble.spruce.Utils; import org.apache.spark.sql.Row; @@ -20,7 +21,7 @@ * Enrichment module that applies a Power Usage Effectiveness (PUE) factor. *

* It attempts to determine the PUE based on the region code ({@link com.digitalpebble.spruce.SpruceColumn#REGION}) - * by looking up values in a CSV resource file ({@code aws-pue-wue.csv}). + * by looking up values in a CSV resource file ({@code aws-pue-wue.csv} or {@code azure-pue-wue.csv}). *

* The lookup logic follows this priority: *

    @@ -32,14 +33,33 @@ public class PUE implements EnrichmentModule { private double defaultPueValue = 1.15; - private static final String CSV_RESOURCE_PATH = "aws-pue-wue.csv"; + private static final String DEFAULT_CSV_RESOURCE_PATH = "aws-pue-wue.csv"; private final Map exactMatches = new HashMap<>(); private final Map regexMatches = new HashMap<>(); @Override public void init(Map params) { - List rows = Utils.loadCSV(CSV_RESOURCE_PATH); + init(params, Provider.AWS); + } + + @Override + public void init(Map params, Provider provider) { + String csvResourcePath = DEFAULT_CSV_RESOURCE_PATH; + + if (provider != null) { + switch (provider) { + case AZURE: + csvResourcePath = "azure-pue-wue.csv"; + break; + case AWS: + default: + csvResourcePath = "aws-pue-wue.csv"; + break; + } + } + + List rows = Utils.loadCSV(csvResourcePath); for (String[] parts : rows) { if (parts.length >= 3) { diff --git a/src/main/java/com/digitalpebble/spruce/modules/Water.java b/src/main/java/com/digitalpebble/spruce/modules/Water.java index 7c5cb7e..b6cd41c 100644 --- a/src/main/java/com/digitalpebble/spruce/modules/Water.java +++ b/src/main/java/com/digitalpebble/spruce/modules/Water.java @@ -39,7 +39,8 @@ public class Water implements EnrichmentModule { private static final Logger log = LoggerFactory.getLogger(Water.class); - private static final String WUE_CSV = "aws-pue-wue.csv"; + private static final String AWS_WUE_CSV = "aws-pue-wue.csv"; + private static final String AZURE_WUE_CSV = "azure-pue-wue.csv"; /** Minimum Aqueduct water stress category to qualify as "under stress". */ static final int HIGH_STRESS_THRESHOLD = 3; @@ -55,13 +56,23 @@ public class Water implements EnrichmentModule { @Override public void init(Map params, Provider provider) { this.provider = provider; - init(params); - } - @Override - public void init(Map params) { + String csvResourcePath = AWS_WUE_CSV; + + if (provider != null) { + switch (provider) { + case AZURE: + csvResourcePath = AZURE_WUE_CSV; + break; + case AWS: + default: + csvResourcePath = AWS_WUE_CSV; + break; + } + } + // Load WUE values from column index 3 of the PUE-WUE CSV - List pueWueRows = Utils.loadCSV(WUE_CSV); + List pueWueRows = Utils.loadCSV(csvResourcePath); for (String[] parts : pueWueRows) { if (parts.length >= 4) { String key = parts[1].trim(); @@ -75,7 +86,7 @@ public void init(Map params) { wueExactMatches.put(key, wue); } } catch (NumberFormatException e) { - log.warn("Invalid WUE value in {} for key: {}", WUE_CSV, key); + log.warn("Invalid WUE value in {} for key: {}", csvResourcePath, key); } } } diff --git a/src/main/java/com/digitalpebble/spruce/modules/ccf/azure/Storage.java b/src/main/java/com/digitalpebble/spruce/modules/ccf/azure/Storage.java new file mode 100644 index 0000000..4c551a9 --- /dev/null +++ b/src/main/java/com/digitalpebble/spruce/modules/ccf/azure/Storage.java @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: Apache-2.0 + +package com.digitalpebble.spruce.modules.ccf.azure; + +import com.digitalpebble.spruce.Column; +import com.digitalpebble.spruce.EnrichmentModule; +import com.digitalpebble.spruce.Utils; +import org.apache.spark.sql.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.digitalpebble.spruce.AzureColumn.*; +import static com.digitalpebble.spruce.SpruceColumn.ENERGY_USED; + +/** + * Provides an estimate of energy used for Azure storage capacity meters. + * + * @see CCF methodology + **/ +public class Storage implements EnrichmentModule { + + private static final Logger LOG = LoggerFactory.getLogger(Storage.class); + + private static final Pattern MANAGED_DISK_PATTERN = Pattern.compile("\\b([PES]\\d{1,2})\\b"); + private static final double HOURS_PER_AZURE_PRICING_MONTH = 30d * 24d; + private static final String MANAGED_DISK_REPLICATION_FACTOR = "STORAGE_DISKS"; + + List dataStoredUsageTypes; + Map storageUsageUnitMultipliers; + Map managedDiskUsageUnitMultipliers; + Map replicationFactors; + Map managedDisks; + + // 0.65 Watt-Hours per Terabyte-Hour for HDD + double hdd_gb_coefficient = 0.65 / 1024d; + // 1.2 Watt-Hours per Terabyte-Hour for SSD + double ssd_gb_coefficient = 1.2 / 1024d; + + @Override + @SuppressWarnings("unchecked") + public void init(Map params) { + Double coef = (Double) params.get("hdd_coefficient_tb_h"); + if (coef != null) { + hdd_gb_coefficient = coef / 1024d; + } + coef = (Double) params.get("ssd_coefficient_tb_h"); + if (coef != null) { + ssd_gb_coefficient = coef / 1024d; + } + + LOG.info("hdd_gb_coefficient: {}", hdd_gb_coefficient); + LOG.info("ssd_gb_coefficient: {}", ssd_gb_coefficient); + + try { + Map map = Utils.loadJSONResources("ccf/azure-storage.json"); + dataStoredUsageTypes = (List) map.get("DATA_STORED_USAGE_TYPES"); + storageUsageUnitMultipliers = (Map) map.get("STORAGE_USAGE_UNITS"); + managedDiskUsageUnitMultipliers = (Map) map.get("MANAGED_DISK_USAGE_UNITS"); + replicationFactors = (Map) map.get("REPLICATION_FACTORS"); + managedDisks = loadManagedDisks((Map) map.get("MANAGED_DISKS")); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public Column[] columnsNeeded() { + return new Column[]{METER_CATEGORY, METER_SUBCATEGORY, METER_NAME, UNIT_OF_MEASURE, QUANTITY}; + } + + @Override + public Column[] columnsAdded() { + return new Column[]{ENERGY_USED}; + } + + @Override + public void enrich(Row row, Map enrichedValues) { + String meterCategory = METER_CATEGORY.getString(row); + if (!"Storage".equals(meterCategory)) { + return; + } + + String meterName = METER_NAME.getString(row); + if (meterName == null) { + return; + } + if (QUANTITY.isNullAt(row)) { + return; + } + + String meterSubCategory = METER_SUBCATEGORY.getString(row); + String unit = UNIT_OF_MEASURE.getString(row); + double quantity = QUANTITY.getDouble(row); + boolean isHDD = false; + double gbHours; + int replication; + + // Generic storage capacity meters already report GB-month or TB-month usage. + // Managed Disks report a fraction of a provisioned disk-month, so resolve the + // disk SKU first and use its provisioned capacity rather than used disk space. + if (containsAny(dataStoredUsageTypes, meterName)) { + double gbMonths = getGbMonths(quantity, unit); + if (Double.isNaN(gbMonths)) { + LOG.debug("Storage unit not found for {} {}", meterName, unit); + return; + } + gbHours = Utils.Conversions.GBMonthsToGBHours(gbMonths); + replication = getReplicationFactor(meterName); + } else { + ManagedDisk managedDisk = getManagedDisk(meterName, meterSubCategory); + if (managedDisk == null || isDiskOperation(meterName)) { + return; + } + gbHours = getManagedDiskGbHours(quantity, unit, managedDisk.sizeGb()); + isHDD = managedDisk.hdd(); + replication = replicationFactors.get(MANAGED_DISK_REPLICATION_FACTOR); + } + + if (Double.isNaN(gbHours)) { + LOG.debug("Storage unit not found for {} {}", meterName, unit); + return; + } + + computeEnergy(gbHours, isHDD, replication, enrichedValues); + } + + double getGbMonths(double quantity, String unit) { + Integer multiplier = storageUsageUnitMultipliers.get(unit); + if (multiplier != null) { + return quantity * multiplier; + } + return Double.NaN; + } + + double getManagedDiskGbHours(double quantity, String unit, int diskSizeGb) { + Integer multiplier = managedDiskUsageUnitMultipliers.get(unit); + if (multiplier != null) { + return quantity * multiplier * diskSizeGb * HOURS_PER_AZURE_PRICING_MONTH; + } + return Double.NaN; + } + + ManagedDisk getManagedDisk(String meterName, String meterSubCategory) { + if (meterSubCategory == null || !meterSubCategory.contains("Managed Disks")) { + return null; + } + Matcher matcher = MANAGED_DISK_PATTERN.matcher(meterName); + if (!matcher.find()) { + return null; + } + return managedDisks.get(matcher.group(1)); + } + + boolean isDiskOperation(String meterName) { + return meterName.contains("Operations"); + } + + int getReplicationFactor(String meterName) { + if (meterName == null) { + return replicationFactors.get("DEFAULT"); + } + if (meterName.contains("GZRS")) { + return replicationFactors.get("STORAGE_GZRS"); + } + if (meterName.contains("LRS") || meterName.contains("ZRS")) { + return meterName.contains("LRS") ? + replicationFactors.get("STORAGE_LRS") : replicationFactors.get("STORAGE_ZRS"); + } + if (meterName.contains("RA-GRS") || meterName.contains("GRS")) { + return replicationFactors.get("STORAGE_GRS"); + } + return replicationFactors.get("DEFAULT"); + } + + private void computeEnergy(double gbHours, boolean isHDD, int replication, Map enrichedValues) { + double coefficient = isHDD ? hdd_gb_coefficient : ssd_gb_coefficient; + double energyKwh = gbHours / 1000 * coefficient * replication; + enrichedValues.put(ENERGY_USED, energyKwh); + } + + @SuppressWarnings("unchecked") + private Map loadManagedDisks(Map disks) { + Map loaded = new HashMap<>(); + for (Map.Entry entry : disks.entrySet()) { + Map values = (Map) entry.getValue(); + Number capacityGb = (Number) values.get("capacity_gb"); + String storageType = (String) values.get("storage_type"); + loaded.put(entry.getKey(), new ManagedDisk( + capacityGb.intValue(), + "HDD".equals(storageType) + )); + } + return loaded; + } + + record ManagedDisk(int sizeGb, boolean hdd) { + } + + private static boolean containsAny(List values, String candidate) { + for (String value : values) { + if (candidate.contains(value)) { + return true; + } + } + return false; + } +} diff --git a/src/main/java/com/digitalpebble/spruce/modules/ccf/azure/package-info.java b/src/main/java/com/digitalpebble/spruce/modules/ccf/azure/package-info.java new file mode 100644 index 0000000..3591fca --- /dev/null +++ b/src/main/java/com/digitalpebble/spruce/modules/ccf/azure/package-info.java @@ -0,0 +1,7 @@ +/** + * Azure-specific implementations of the Cloud Carbon Footprint enrichment + * modules. + * + * @see CCF methodology + */ +package com.digitalpebble.spruce.modules.ccf.azure; diff --git a/src/main/resources/azure-pue-wue.csv b/src/main/resources/azure-pue-wue.csv new file mode 100644 index 0000000..f4eeacf --- /dev/null +++ b/src/main/resources/azure-pue-wue.csv @@ -0,0 +1,78 @@ +# https://datacenters.microsoft.com/sustainability/efficiency/ +# Name,RegionID,PUE,WUE +Australia Central (Canberra),australiacentral,1.28,0.25 +Australia Central 2 (Canberra),australiacentral2,1.28,0.25 +Australia Southeast (Melbourne),australiasoutheast,1.28,0.25 +Australia East (Sydney),australiaeast,1.28,0.25 +Austria East (Vienna),austriaeast,1.16,0.03 +Belgium Central,belgiumcentral,1.16,0.03 +Brazil Southeast (Rio),brazilsoutheast,1.16,0.34 +Brazil South (São Paulo),brazilsouth,1.16,0.34 +Canada East (Quebec City),canadaeast,1.16,0.34 +Canada Central (Toronto),canadacentral,1.16,0.34 +Chile Central (Santiago),chilecentral,1.16,0.34 +China North (Beijing),chinanorth,1.28,0.25 +China North 2 (Beijing),chinanorth2,1.28,0.25 +China North 3 (Beijing),chinanorth3,1.28,0.25 +East Asia (Hong Kong),eastasia,1.28,0.25 +China East 3 (Jiangsu),chinaeast3,1.28,0.25 +China East (Shanghai),chinaeast,1.28,0.25 +China East 2 (Shanghai),chinaeast2,1.28,0.25 +Denmark East (Copenhagen),denmarkeast,1.16,0.03 +Finland (Helsinki),finlandcentral,1.16,0.03 +France South (Marseille),francesouth,1.16,0.03 +France Central (Paris),francecentral,1.16,0.03 +Germany North (Berlin),germanynorth,1.16,0.03 +Germany West Central (Frankfurt),germanywestcentral,1.16,0.03 +Greece Central (Athens),greececentral,1.16,0.03 +South India (Chennai),southindia,1.28,0.25 +Southcentral (Hyderabad),indiasouthcentral,1.28,0.25 +West India (Mumbai),westindia,1.28,0.25 +Central India (Pune),centralindia,1.28,0.25 +Indonesia Central (Jakarta),indonesiacentral,1.28,0.25 +North Europe (Dublin),northeurope,1.16,0.03 +Israel Central (Tel Aviv),israelcentral,1.16,0.03 +Italy North (Milan),italynorth,1.16,0.03 +Japan West (Osaka),japanwest,1.28,0.25 +Japan East (Saitama, Tokyo),japaneast,1.28,0.25 +Malaysia West (Kuala Lumpur),malaysiawest,1.28,0.25 +Mexico Central (Querétaro),mexicocentral,1.16,0.34 +West Europe (Amsterdam),westeurope,1.16,0.03 +New Zealand North (Auckland),newzealandnorth,1.28,0.25 +Norway East (Oslo),norwayeast,1.16,0.03 +Norway West (Stavanger),norwaywest,1.16,0.03 +Poland Central (Warsaw),polandcentral,1.16,0.03 +Qatar Central (Doha),qatarcentral,1.16,0.03 +Saudi Arabia East,saudiarabiasouthcentral,1.28,0.25 +Southeast Asia (Singapore),southeastasia,1.28,0.25 +South Africa West (Cape Town),southafricawest,1.16,0.03 +South Africa North (Johannesburg),southafricanorth,1.16,0.03 +Korea South (Busan),koreasouth,1.28,0.25 +Korea Central (Seoul),koreacentral,1.28,0.25 +Spain Central (Madrid),spaincentral,1.16,0.03 +Sweden Central (Gävle),swedencentral,1.16,0.03 +Sweden South (Staffanstorp),swedensouth,1.16,0.03 +Switzerland West (Geneva),switzerlandwest,1.16,0.03 +Switzerland North (Zürich),switzerlandnorth,1.16,0.03 +Taiwan North (Taipei),taiwannorth,1.28,0.25 +UAE Central (Abu Dhabi),uaecentral,1.16,0.03 +UAE North (Dubai),uaenorth,1.16,0.03 +UK West (Cardiff),ukwest,1.16,0.03 +UK South (London),uksouth,1.16,0.03 +East US 3 (Atlanta),eastus3,1.16,0.34 +US Gov Texas,usgovtexas,1.16,0.34 +West Central US (Cheyenne),westcentralus,1.16,0.34 +North Central US (Chicago),northcentralus,1.16,0.34 +Central US (Des Moines),centralus,1.16,0.34 +US DoD Central,usdodwest,1.16,0.34 +West US 2 (Quincy),westus2,1.16,0.34 +US Gov Arizona,usgovarizona,1.16,0.34 +West US 3 (Phoenix),westus3,1.16,0.34 +East US 2 (Richmond),eastus2,1.16,0.34 +US DoD East,usdodeast,1.16,0.34 +East US (Ashburn),eastus,1.16,0.34 +US Sec East,usseceast,1.16,0.34 +South Central US (San Antonio),southcentralus,1.16,0.34 +West US (San Francisco),westus,1.16,0.34 +US Sec West,ussecwest,1.16,0.34 +US Gov Virginia ,usgovvirginia,1.16,0.34 diff --git a/src/main/resources/ccf/azure-storage.json b/src/main/resources/ccf/azure-storage.json new file mode 100644 index 0000000..2f96808 --- /dev/null +++ b/src/main/resources/ccf/azure-storage.json @@ -0,0 +1,64 @@ +{ + "DATA_STORED_USAGE_TYPES": [ + "Data Stored" + ], + "STORAGE_USAGE_UNITS": { + "1 GB/Month": 1, + "10 GB/Month": 10, + "100 GB/Month": 100, + "1 TB/Month": 1024 + }, + "MANAGED_DISK_USAGE_UNITS": { + "1/Month": 1, + "1 /Month": 1 + }, + "REPLICATION_FACTORS": { + "STORAGE_LRS": 3, + "STORAGE_ZRS": 3, + "STORAGE_GRS": 6, + "STORAGE_GZRS": 6, + "STORAGE_DISKS": 3, + "DEFAULT": 1 + }, + "MANAGED_DISKS": { + "P1": {"capacity_gb": 4, "storage_type": "SSD"}, + "P2": {"capacity_gb": 8, "storage_type": "SSD"}, + "P3": {"capacity_gb": 16, "storage_type": "SSD"}, + "P4": {"capacity_gb": 32, "storage_type": "SSD"}, + "P6": {"capacity_gb": 64, "storage_type": "SSD"}, + "P10": {"capacity_gb": 128, "storage_type": "SSD"}, + "P15": {"capacity_gb": 256, "storage_type": "SSD"}, + "P20": {"capacity_gb": 512, "storage_type": "SSD"}, + "P30": {"capacity_gb": 1024, "storage_type": "SSD"}, + "P40": {"capacity_gb": 2048, "storage_type": "SSD"}, + "P50": {"capacity_gb": 4096, "storage_type": "SSD"}, + "P60": {"capacity_gb": 8192, "storage_type": "SSD"}, + "P70": {"capacity_gb": 16384, "storage_type": "SSD"}, + "P80": {"capacity_gb": 32767, "storage_type": "SSD"}, + "E1": {"capacity_gb": 4, "storage_type": "SSD"}, + "E2": {"capacity_gb": 8, "storage_type": "SSD"}, + "E3": {"capacity_gb": 16, "storage_type": "SSD"}, + "E4": {"capacity_gb": 32, "storage_type": "SSD"}, + "E6": {"capacity_gb": 64, "storage_type": "SSD"}, + "E10": {"capacity_gb": 128, "storage_type": "SSD"}, + "E15": {"capacity_gb": 256, "storage_type": "SSD"}, + "E20": {"capacity_gb": 512, "storage_type": "SSD"}, + "E30": {"capacity_gb": 1024, "storage_type": "SSD"}, + "E40": {"capacity_gb": 2048, "storage_type": "SSD"}, + "E50": {"capacity_gb": 4096, "storage_type": "SSD"}, + "E60": {"capacity_gb": 8192, "storage_type": "SSD"}, + "E70": {"capacity_gb": 16384, "storage_type": "SSD"}, + "E80": {"capacity_gb": 32767, "storage_type": "SSD"}, + "S4": {"capacity_gb": 32, "storage_type": "HDD"}, + "S6": {"capacity_gb": 64, "storage_type": "HDD"}, + "S10": {"capacity_gb": 128, "storage_type": "HDD"}, + "S15": {"capacity_gb": 256, "storage_type": "HDD"}, + "S20": {"capacity_gb": 512, "storage_type": "HDD"}, + "S30": {"capacity_gb": 1024, "storage_type": "HDD"}, + "S40": {"capacity_gb": 2048, "storage_type": "HDD"}, + "S50": {"capacity_gb": 4096, "storage_type": "HDD"}, + "S60": {"capacity_gb": 8192, "storage_type": "HDD"}, + "S70": {"capacity_gb": 16384, "storage_type": "HDD"}, + "S80": {"capacity_gb": 32767, "storage_type": "HDD"} + } +} diff --git a/src/main/resources/default-config-azure.json b/src/main/resources/default-config-azure.json index f294f76..42efcb5 100644 --- a/src/main/resources/default-config-azure.json +++ b/src/main/resources/default-config-azure.json @@ -3,6 +3,13 @@ { "className": "com.digitalpebble.spruce.modules.azure.RegionExtraction" }, + { + "className": "com.digitalpebble.spruce.modules.ccf.azure.Storage", + "config": { + "hdd_coefficient_tb_h": 0.65, + "ssd_coefficient_tb_h": 1.2 + } + }, { "className": "com.digitalpebble.spruce.modules.azure.Networking", "config": { @@ -13,8 +20,24 @@ } } }, + { + "className": "com.digitalpebble.spruce.modules.PUE", + "config": { + "default": 1.17 + } + }, + { + "className": "com.digitalpebble.spruce.modules.Water" + }, { "className": "com.digitalpebble.spruce.modules.ember.AverageCarbonIntensity" + }, + { + "className": "com.digitalpebble.spruce.modules.OperationalEmissions", + "config": { + "powerSupplyEfficiency": 1.04, + "powerTransmissionLosses": 1.08 + } } ] } diff --git a/src/test/java/com/digitalpebble/spruce/ConfigTest.java b/src/test/java/com/digitalpebble/spruce/ConfigTest.java index e19bf5d..1f712cd 100644 --- a/src/test/java/com/digitalpebble/spruce/ConfigTest.java +++ b/src/test/java/com/digitalpebble/spruce/ConfigTest.java @@ -206,4 +206,11 @@ void testLoadDefaultConfigForProvider(String providerName) throws Exception { assertNotNull(conf.getModules()); assertFalse(conf.getModules().isEmpty()); } -} \ No newline at end of file + + @Test + void testAzureDefaultConfigIncludesStorage() throws Exception { + Config conf = Config.loadDefault(Provider.AZURE); + assertTrue(conf.getModules().stream() + .anyMatch(module -> module instanceof com.digitalpebble.spruce.modules.ccf.azure.Storage)); + } +} diff --git a/src/test/java/com/digitalpebble/spruce/modules/PUETest.java b/src/test/java/com/digitalpebble/spruce/modules/PUETest.java index 4e84a8f..8df1ba6 100644 --- a/src/test/java/com/digitalpebble/spruce/modules/PUETest.java +++ b/src/test/java/com/digitalpebble/spruce/modules/PUETest.java @@ -3,6 +3,7 @@ package com.digitalpebble.spruce.modules; import com.digitalpebble.spruce.Column; +import com.digitalpebble.spruce.Provider; import com.digitalpebble.spruce.SpruceColumn; import com.digitalpebble.spruce.Utils; import org.apache.spark.sql.Row; @@ -17,8 +18,7 @@ import java.util.Map; import static com.digitalpebble.spruce.SpruceColumn.*; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.*; class PUETest { @@ -76,4 +76,41 @@ void processRegionPUEValues(double energyUsed, String region, double expectedPUE assertEquals(expectedPUE, (Double) enriched.get(SpruceColumn.PUE), 0.001, "Failed for region: " + region); } + + @Test + void testInitWithProvider() { + // Test that the provider-aware init method works + PUE pueWithProvider = new PUE(); + pueWithProvider.init(new HashMap<>(), Provider.AWS); + + // Test with Azure provider + PUE pueAzure = new PUE(); + pueAzure.init(new HashMap<>(), Provider.AZURE); + + // Both should initialize without errors + } + + @Test + void testAzurePUEFileLoadedCorrectly() { + // Test that Azure PUE file loads correctly by verifying a specific region + PUE pueAzure = new PUE(); + pueAzure.init(new HashMap<>(), Provider.AZURE); + + // Create a row with Azure region + Row row = new GenericRowWithSchema(new Object[]{null, null, null}, schema); + Map enriched = new HashMap<>(); + enriched.put(ENERGY_USED, 100d); + enriched.put(REGION, "eastus"); // Azure region + + // Process with PUE module + pueAzure.enrich(row, enriched); + + // Should have a PUE value calculated (this validates resource loading) + assertNotNull(enriched.get(SpruceColumn.PUE)); + + // With the Azure PUE file, eastus should have a specific value (1.12 based on typical Azure PUE data) + Double pueValue = (Double) enriched.get(SpruceColumn.PUE); + assertNotNull(pueValue); + assertTrue(pueValue > 0.0); + } } diff --git a/src/test/java/com/digitalpebble/spruce/modules/WaterTest.java b/src/test/java/com/digitalpebble/spruce/modules/WaterAWSTest.java similarity index 99% rename from src/test/java/com/digitalpebble/spruce/modules/WaterTest.java rename to src/test/java/com/digitalpebble/spruce/modules/WaterAWSTest.java index c2aa524..07469ae 100644 --- a/src/test/java/com/digitalpebble/spruce/modules/WaterTest.java +++ b/src/test/java/com/digitalpebble/spruce/modules/WaterAWSTest.java @@ -4,7 +4,6 @@ import com.digitalpebble.spruce.Column; import com.digitalpebble.spruce.Provider; -import com.digitalpebble.spruce.SpruceColumn; import com.digitalpebble.spruce.Utils; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema; @@ -19,7 +18,7 @@ import static com.digitalpebble.spruce.SpruceColumn.*; import static org.junit.jupiter.api.Assertions.*; -class WaterTest { +class WaterAWSTest { private Water water; private StructType schema; diff --git a/src/test/java/com/digitalpebble/spruce/modules/WaterAzureTest.java b/src/test/java/com/digitalpebble/spruce/modules/WaterAzureTest.java new file mode 100644 index 0000000..af2a9c0 --- /dev/null +++ b/src/test/java/com/digitalpebble/spruce/modules/WaterAzureTest.java @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: Apache-2.0 + +package com.digitalpebble.spruce.modules; + +import com.digitalpebble.spruce.Column; +import com.digitalpebble.spruce.Provider; +import com.digitalpebble.spruce.SpruceColumn; +import com.digitalpebble.spruce.Utils; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +import static com.digitalpebble.spruce.SpruceColumn.*; +import static org.junit.jupiter.api.Assertions.*; + +class WaterAzureTest { + + private Water water; + private StructType schema; + + @BeforeEach + void setUp() { + water = new Water(); + water.init(new HashMap<>(), Provider.AZURE); + schema = Utils.getSchema(water); + } + + private Row emptyRow() { + return new GenericRowWithSchema(new Object[schema.length()], schema); + } + + @Test + void testAzureWUEFileLoadedCorrectly() { + // Test that Azure WUE file loads correctly by verifying a specific region + Water waterAzure = new Water(); + waterAzure.init(new HashMap<>(), Provider.AZURE); + + // Create a row with Azure region + Row row = emptyRow(); + Map enriched = new HashMap<>(); + enriched.put(ENERGY_USED, 100d); + enriched.put(PUE, 1.16); // Typical Azure PUE value + enriched.put(REGION, "eastus"); // Azure region + + // Process with Water module + waterAzure.enrich(row, enriched); + + // Should have water cooling calculated (validates resource loading) + assertTrue(enriched.containsKey(WATER_COOLING)); + + // With the Azure PUE file, eastus should have WUE = 0.34 + // 100 * 1.16 * 0.34 = 39.44 + assertEquals(39.44, (Double) enriched.get(WATER_COOLING), 0.01); + } + + @Test + void testAzureRegionWithExactMatch() { + // Test specific Azure regions with exact matches in azure-pue-wue.csv + Row row = emptyRow(); + Map enriched = new HashMap<>(); + enriched.put(ENERGY_USED, 50d); + enriched.put(PUE, 1.16); // Typical Azure PUE value + enriched.put(REGION, "westeurope"); // Azure region with WUE = 0.03 + + water.enrich(row, enriched); + + assertTrue(enriched.containsKey(WATER_COOLING)); + // 50 * 1.16 * 0.03 = 1.74 + assertEquals(1.74, (Double) enriched.get(WATER_COOLING), 0.01); + } + + @Test + void testAzureRegionWithNoWUE() { + // Test a region that doesn't have WUE data in Azure file + Row row = emptyRow(); + Map enriched = new HashMap<>(); + enriched.put(ENERGY_USED, 100d); + enriched.put(PUE, 1.28); // Typical Azure PUE value + enriched.put(REGION, "bogus-region-99"); + + water.enrich(row, enriched); + + // Should not contain water cooling since no WUE data + assertFalse(enriched.containsKey(WATER_COOLING)); + } + + @Test + void testInitWithProviderAzure() { + // Test that the provider-aware init method works with Azure + Water waterAzure = new Water(); + waterAzure.init(new HashMap<>(), Provider.AZURE); + + // Should initialize without errors + assertNotNull(waterAzure); + } + + @Test + void testAzureWUEValues() { + // Test a few Azure regions with specific WUE values to confirm they're loaded properly + Map testCases = new HashMap<>(); + testCases.put("eastus", 0.34); // WUE = 0.34 + testCases.put("westeurope", 0.03); // WUE = 0.03 + testCases.put("australiaeast", 0.25); // WUE = 0.25 + + for (Map.Entry testCase : testCases.entrySet()) { + String region = testCase.getKey(); + double expectedWUE = testCase.getValue(); + + Row row = emptyRow(); + Map enriched = new HashMap<>(); + enriched.put(ENERGY_USED, 100d); + enriched.put(PUE, 1.16); // Consistent PUE value for testing + enriched.put(REGION, region); + + water.enrich(row, enriched); + + // We only test that the WUE file is properly loaded by ensuring + // water cooling is calculated (which requires valid WUE data) + assertTrue(enriched.containsKey(WATER_COOLING), "Should have WATER_COOLING for region: " + region); + + // Calculate expected cooling value: 100 * 1.16 * WUE + double expectedCooling = 100 * 1.16 * expectedWUE; + assertEquals(expectedCooling, (Double) enriched.get(WATER_COOLING), 0.01, + "Failed for region: " + region + " with expected WUE: " + expectedWUE); + } + } +} \ No newline at end of file diff --git a/src/test/java/com/digitalpebble/spruce/modules/ccf/azure/StorageTest.java b/src/test/java/com/digitalpebble/spruce/modules/ccf/azure/StorageTest.java new file mode 100644 index 0000000..9d95cb5 --- /dev/null +++ b/src/test/java/com/digitalpebble/spruce/modules/ccf/azure/StorageTest.java @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: Apache-2.0 + +package com.digitalpebble.spruce.modules.ccf.azure; + +import com.digitalpebble.spruce.Column; +import com.digitalpebble.spruce.Utils; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + +import static com.digitalpebble.spruce.SpruceColumn.ENERGY_USED; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +class StorageTest { + + private static final double ONE_HOUR_MONTH_RATIO = 1d / (30d * 24d); + private static final double AZURE_PRICING_MONTH_HOURS = 30d * 24d; + + private Storage storage; + private StructType schema; + + @BeforeEach + void initialize() { + storage = new Storage(); + storage.init(Map.of()); + schema = Utils.getSchema(storage); + } + + private static Stream provideStorageRows() { + return Stream.of( + Arguments.of("Storage", "Tables", "LRS Data Stored", "1 GB/Month", + 10d, Utils.Conversions.GBMonthsToGBHours(10d), 3, false), + Arguments.of("Storage", "Tables", "RA-GRS Data Stored", "10 GB/Month", + 2d, Utils.Conversions.GBMonthsToGBHours(20d), 6, false), + Arguments.of("Storage", "Tables", "LRS Data Stored", "1 TB/Month", + 1d, Utils.Conversions.GBMonthsToGBHours(1024d), 3, false), + Arguments.of("Storage", "Tables", "LRS Data Stored", "1 GB/Month", + -5d, Utils.Conversions.GBMonthsToGBHours(-5d), 3, false), + Arguments.of("Storage", "Premium SSD Managed Disks", "P10 LRS Disk", "1/Month", + ONE_HOUR_MONTH_RATIO, 128d, 3, false), + Arguments.of("Storage", "Standard SSD Managed Disks", "E2 LRS Disk", "1/Month", + 2d, 2d * 8d * AZURE_PRICING_MONTH_HOURS, 3, false), + Arguments.of("Storage", "Standard HDD Managed Disks", "S4 LRS Disk", "1/Month", + 2d, 2d * 32d * AZURE_PRICING_MONTH_HOURS, 3, true) + ); + } + + private static Stream provideIgnoredRows() { + return Stream.of( + Arguments.of("Bandwidth", "Inter-Region", "LRS Data Stored", "1 GB/Month", 10d), + Arguments.of("Storage", "Tables", "LRS Data Stored", "10K", 10d), + Arguments.of("Storage", "Tables", "LRS Data Stored", "1M", 10d), + Arguments.of("Storage", "Tables", "LRS Data Stored", "1 GB", 10d), + Arguments.of("Storage", "Tables", "LRS Data Stored", "10K/Month", 10d), + Arguments.of("Storage", "Tables", "Read Operations", "10K", 10d), + Arguments.of("Storage", "Standard SSD Managed Disks", "E4 LRS Disk Operations", "10K", 10d), + Arguments.of("Storage", "Standard HDD Managed Disks", "S4 LRS Disk Operations", "10K", 10d), + Arguments.of("Storage", "Premium SSD Managed Disks", "P99 LRS Disk", "1/Month", 10d), + Arguments.of("Storage", "Premium SSD Managed Disks", "P10 LRS Disk", "100/Month", 10d), + Arguments.of("Storage", "Premium SSD Managed Disks", null, "1/Month", 10d), + Arguments.of("Storage", "Premium SSD Managed Disks", "P10 LRS Disk", "1/Month", null) + ); + } + + @ParameterizedTest + @MethodSource("provideStorageRows") + void processStorageRows(String meterCategory, String meterSubCategory, String meterName, String unit, + double quantity, double gbHours, int replication, boolean isHDD) { + Map enriched = enrich(row(meterCategory, meterSubCategory, meterName, unit, quantity)); + double expected = expected(gbHours, replication, isHDD); + assertEquals(expected, (Double) enriched.get(ENERGY_USED), 0.0001); + } + + @ParameterizedTest + @MethodSource("provideIgnoredRows") + void processIgnoredRows(String meterCategory, String meterSubCategory, String meterName, String unit, + Double quantity) { + Map enriched = enrich(row(meterCategory, meterSubCategory, meterName, unit, quantity)); + assertFalse(enriched.containsKey(ENERGY_USED)); + } + + private Row row(String meterCategory, String meterSubCategory, String meterName, String unit, Double quantity) { + Object[] values = new Object[]{meterCategory, meterSubCategory, meterName, unit, quantity, null}; + return new GenericRowWithSchema(values, schema); + } + + private Map enrich(Row row) { + Map enriched = new HashMap<>(); + storage.enrich(row, enriched); + return enriched; + } + + private double expected(double gbHours, int replication, boolean isHDD) { + double coefficient = isHDD ? storage.hdd_gb_coefficient : storage.ssd_gb_coefficient; + return gbHours / 1000 * coefficient * replication; + } +}