From 418ed3c05b873b1ea0d75a4da163e7d6daf178a7 Mon Sep 17 00:00:00 2001
From: policyengine-bot <bot@policyengine.org>
Date: Tue, 16 Dec 2025 14:58:33 +0000
Subject: [PATCH 1/3] Optimize uprate_parameters by batching parameter lookups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change reduces the number of repeated function calls in the
uprate_parameters function by:

1. Batching uprating parameter value lookups in the standard uprating path
   - Pre-compute all instants that need uprating
   - Batch lookup all uprating parameter values into a cache
   - Use cached values in the loop instead of repeated function calls

2. Batching get_at_instant calls in the cadence uprating path
   - Pre-compute all calculation dates
   - Batch lookup all uprating parameter values into a cache
   - Use cached values instead of repeated function calls

These optimizations should significantly reduce the overhead from the
~1M parameter lookups identified in the profiling analysis, addressing
the 46% of import time spent in uprate_parameters.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../operations/uprate_parameters.py           | 86 +++++++++++--------
 1 file changed, 52 insertions(+), 34 deletions(-)

diff --git a/policyengine_core/parameters/operations/uprate_parameters.py b/policyengine_core/parameters/operations/uprate_parameters.py
index b09b2f5c..3acb2491 100644
--- a/policyengine_core/parameters/operations/uprate_parameters.py
+++ b/policyengine_core/parameters/operations/uprate_parameters.py
@@ -137,30 +137,36 @@ def uprate_parameters(root: ParameterNode) -> ParameterNode:
                     # Pre-compute uprater values for all entries to avoid repeated lookups
                     has_rounding = "rounding" in meta
 
-                    # For each defined instant in the uprating parameter
+                    # Pre-compute all uprating values and instants to reduce function calls
+                    uprating_entries = []
                     for entry in uprating_parameter.values_list[::-1]:
                         entry_instant = instant(entry.instant_str)
-                        # If the uprater instant is defined after the last parameter instant
                         if entry_instant > last_instant:
-                            # Apply the uprater and add to the parameter
-                            uprater_at_entry = uprating_parameter(
-                                entry_instant
-                            )
-                            uprater_change = (
-                                uprater_at_entry / uprater_at_start
+                            uprating_entries.append((entry_instant, entry.instant_str))
+
+                    # Batch lookup of uprating parameter values
+                    uprater_values = {
+                        entry_instant: uprating_parameter(entry_instant)
+                        for entry_instant, _ in uprating_entries
+                    }
+
+                    # For each defined instant in the uprating parameter
+                    for entry_instant, entry_instant_str in uprating_entries:
+                        # Apply the uprater and add to the parameter
+                        uprater_at_entry = uprater_values[entry_instant]
+                        uprater_change = uprater_at_entry / uprater_at_start
+                        uprated_value = value_at_start * uprater_change
+                        if has_rounding:
+                            uprated_value = round_uprated_value(
+                                meta, uprated_value
                             )
-                            uprated_value = value_at_start * uprater_change
-                            if has_rounding:
-                                uprated_value = round_uprated_value(
-                                    meta, uprated_value
-                                )
-                            parameter.values_list.append(
-                                ParameterAtInstant(
-                                    parameter.name,
-                                    entry.instant_str,
-                                    data=uprated_value,
-                                )
+                        parameter.values_list.append(
+                            ParameterAtInstant(
+                                parameter.name,
+                                entry_instant_str,
+                                data=uprated_value,
                             )
+                        )
                 # Whether using cadence or not, sort the parameter values_list
                 parameter.values_list.sort(
                     key=lambda x: x.instant_str, reverse=True
@@ -374,21 +380,33 @@ def uprate_by_cadence(
     # Set a starting reference value to calculate against
     reference_value = parameter.get_at_instant(instant(first_date.date()))
 
-    # For each entry (corresponding to an enactment date) in the iteration list...
-    for enactment_date in iterations:
-        # Calculate the start and end calculation dates
-        start_calc_date: datetime = enactment_date - enactment_start_offset
-        end_calc_date: datetime = enactment_date - enactment_end_offset
-
-        # Find uprater value at cadence start
-        start_val = uprating_parameter.get_at_instant(
-            instant(start_calc_date.date())
-        )
+    # Pre-compute all instants and batch lookup uprating parameter values
+    iteration_list = list(iterations)
+    calc_dates = []
+    for enactment_date in iteration_list:
+        start_calc_date = enactment_date - enactment_start_offset
+        end_calc_date = enactment_date - enactment_end_offset
+        calc_dates.append((enactment_date, start_calc_date, end_calc_date))
+
+    # Batch lookup all uprating parameter values to reduce repeated function calls
+    uprater_cache = {}
+    for _, start_calc_date, end_calc_date in calc_dates:
+        start_instant = instant(start_calc_date.date())
+        end_instant = instant(end_calc_date.date())
+        if start_instant not in uprater_cache:
+            uprater_cache[start_instant] = uprating_parameter.get_at_instant(start_instant)
+        if end_instant not in uprater_cache:
+            uprater_cache[end_instant] = uprating_parameter.get_at_instant(end_instant)
+
+    has_rounding = "rounding" in meta
 
-        # Find uprater value at cadence end
-        end_val = uprating_parameter.get_at_instant(
-            instant(end_calc_date.date())
-        )
+    # For each entry (corresponding to an enactment date) in the iteration list...
+    for enactment_date, start_calc_date, end_calc_date in calc_dates:
+        # Get pre-computed uprater values
+        start_instant = instant(start_calc_date.date())
+        end_instant = instant(end_calc_date.date())
+        start_val = uprater_cache[start_instant]
+        end_val = uprater_cache[end_instant]
 
         # Ensure that earliest date exists within uprater
         if not start_val:
@@ -401,7 +419,7 @@ def uprate_by_cadence(
 
         # Uprate value
         uprated_value = difference * reference_value
-        if "rounding" in meta:
+        if has_rounding:
             uprated_value = round_uprated_value(meta, uprated_value)
 
         # Add uprated value to data list

From 6f704f91fcb490c686130f89cfcfe891cdc6df08 Mon Sep 17 00:00:00 2001
From: policyengine-bot <bot@policyengine.org>
Date: Tue, 16 Dec 2025 16:10:01 +0000
Subject: [PATCH 2/3] Fix uprate_parameters optimization with global cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous optimization moved parameter lookups but didn't reduce them.
With ~914K calls still happening, the issue was that the dict comprehension
ran inside the outer loop, once per parameter being uprated.

This fix adds a global cache keyed by (parameter_name, instant) that persists
across all parameters being uprated. This eliminates redundant lookups when
multiple parameters share the same uprating parameter (e.g., CPI).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../operations/uprate_parameters.py           | 46 +++++++++++++------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/policyengine_core/parameters/operations/uprate_parameters.py b/policyengine_core/parameters/operations/uprate_parameters.py
index 3acb2491..e8d6f40f 100644
--- a/policyengine_core/parameters/operations/uprate_parameters.py
+++ b/policyengine_core/parameters/operations/uprate_parameters.py
@@ -36,6 +36,10 @@ def uprate_parameters(root: ParameterNode) -> ParameterNode:
                 if hasattr(bracket, allowed_key):
                     descendants.append(getattr(bracket, allowed_key))
 
+    # Global cache for uprating parameter values to avoid redundant lookups
+    # Key: (uprating_parameter.name, instant), Value: parameter value at that instant
+    uprating_cache = {}
+
     for parameter in descendants:
         if isinstance(parameter, Parameter):
             if parameter.metadata.get("uprating") is not None:
@@ -110,6 +114,7 @@ def uprate_parameters(root: ParameterNode) -> ParameterNode:
                         uprating_first_date,
                         uprating_last_date,
                         meta,
+                        uprating_cache,
                     )
 
                     # Append uprated data to parameter values list
@@ -127,7 +132,12 @@ def uprate_parameters(root: ParameterNode) -> ParameterNode:
                     # Pre-compute values that don't change in the loop
                     last_instant_str = str(last_instant)
                     value_at_start = parameter(last_instant)
-                    uprater_at_start = uprating_parameter(last_instant)
+
+                    # Use cache for uprating parameter lookup
+                    cache_key = (uprating_parameter.name, last_instant)
+                    if cache_key not in uprating_cache:
+                        uprating_cache[cache_key] = uprating_parameter(last_instant)
+                    uprater_at_start = uprating_cache[cache_key]
 
                     if uprater_at_start is None:
                         raise ValueError(
@@ -144,11 +154,13 @@ def uprate_parameters(root: ParameterNode) -> ParameterNode:
                         if entry_instant > last_instant:
                             uprating_entries.append((entry_instant, entry.instant_str))
 
-                    # Batch lookup of uprating parameter values
-                    uprater_values = {
-                        entry_instant: uprating_parameter(entry_instant)
-                        for entry_instant, _ in uprating_entries
-                    }
+                    # Batch lookup of uprating parameter values using global cache
+                    uprater_values = {}
+                    for entry_instant, _ in uprating_entries:
+                        cache_key = (uprating_parameter.name, entry_instant)
+                        if cache_key not in uprating_cache:
+                            uprating_cache[cache_key] = uprating_parameter(entry_instant)
+                        uprater_values[entry_instant] = uprating_cache[cache_key]
 
                     # For each defined instant in the uprating parameter
                     for entry_instant, entry_instant_str in uprating_entries:
@@ -351,6 +363,7 @@ def uprate_by_cadence(
     first_date: datetime,
     last_date: datetime,
     meta: dict,
+    uprating_cache: dict,
 ) -> list[ParameterAtInstant]:
     # Determine the frequency module to utilize within rrule
     interval = ""
@@ -388,25 +401,28 @@ def uprate_by_cadence(
         end_calc_date = enactment_date - enactment_end_offset
         calc_dates.append((enactment_date, start_calc_date, end_calc_date))
 
-    # Batch lookup all uprating parameter values to reduce repeated function calls
-    uprater_cache = {}
+    # Batch lookup all uprating parameter values using global cache
     for _, start_calc_date, end_calc_date in calc_dates:
         start_instant = instant(start_calc_date.date())
         end_instant = instant(end_calc_date.date())
-        if start_instant not in uprater_cache:
-            uprater_cache[start_instant] = uprating_parameter.get_at_instant(start_instant)
-        if end_instant not in uprater_cache:
-            uprater_cache[end_instant] = uprating_parameter.get_at_instant(end_instant)
+        start_key = (uprating_parameter.name, start_instant)
+        end_key = (uprating_parameter.name, end_instant)
+        if start_key not in uprating_cache:
+            uprating_cache[start_key] = uprating_parameter.get_at_instant(start_instant)
+        if end_key not in uprating_cache:
+            uprating_cache[end_key] = uprating_parameter.get_at_instant(end_instant)
 
     has_rounding = "rounding" in meta
 
     # For each entry (corresponding to an enactment date) in the iteration list...
     for enactment_date, start_calc_date, end_calc_date in calc_dates:
-        # Get pre-computed uprater values
+        # Get pre-computed uprater values from global cache
         start_instant = instant(start_calc_date.date())
         end_instant = instant(end_calc_date.date())
-        start_val = uprater_cache[start_instant]
-        end_val = uprater_cache[end_instant]
+        start_key = (uprating_parameter.name, start_instant)
+        end_key = (uprating_parameter.name, end_instant)
+        start_val = uprating_cache[start_key]
+        end_val = uprating_cache[end_key]
 
         # Ensure that earliest date exists within uprater
         if not start_val:

From 2527f74e3bd14c2677c7a24dec569e15f26c46d1 Mon Sep 17 00:00:00 2001
From: policyengine-bot <bot@policyengine.org>
Date: Tue, 16 Dec 2025 17:03:57 +0000
Subject: [PATCH 3/3] Optimize parameter loading performance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses additional bottlenecks beyond uprate_parameters:

1. **Fix O(n²) complexity in propagate_parameter_metadata**
   - Pre-filter parameters that need metadata propagation
   - Avoid redundant get_descendants() calls
   - Pre-compute metadata dict before inner loop

2. **Optimize instant() function (3M calls)**
   - Reorder isinstance checks by frequency
   - Inline cache lookups for common types
   - Reduce redundant cache_key variable assignments
   - Replace assertions with proper exceptions

These optimizations target the top time-consuming functions identified
via profiling during policyengine_us import.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../propagate_parameter_metadata.py           | 30 +++++++++----
 policyengine_core/periods/helpers.py          | 43 +++++++++++++------
 2 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/policyengine_core/parameters/operations/propagate_parameter_metadata.py b/policyengine_core/parameters/operations/propagate_parameter_metadata.py
index de2925ad..5bdfc956 100644
--- a/policyengine_core/parameters/operations/propagate_parameter_metadata.py
+++ b/policyengine_core/parameters/operations/propagate_parameter_metadata.py
@@ -16,15 +16,27 @@ def propagate_parameter_metadata(root: ParameterNode) -> ParameterNode:
 
     UNPROPAGAGED_METADATA = ["breakdown", "label", "name", "description"]
 
-    for parameter in root.get_descendants():
-        if parameter.metadata.get("propagate_metadata_to_children"):
+    # Pre-compute all descendants once to avoid O(n²) complexity
+    all_descendants = list(root.get_descendants())
+
+    # Find parameters that need to propagate metadata
+    propagators = [
+        p for p in all_descendants
+        if p.metadata.get("propagate_metadata_to_children")
+    ]
+
+    # For each parameter that propagates metadata, update its descendants
+    for parameter in propagators:
+        # Get metadata to propagate
+        metadata_to_propagate = {
+            key: value
+            for key, value in parameter.metadata.items()
+            if key not in UNPROPAGAGED_METADATA
+        }
+
+        # Only call get_descendants() if there's metadata to propagate
+        if metadata_to_propagate:
             for descendant in parameter.get_descendants():
-                descendant.metadata.update(
-                    {
-                        key: value
-                        for key, value in parameter.metadata.items()
-                        if key not in UNPROPAGAGED_METADATA
-                    }
-                )
+                descendant.metadata.update(metadata_to_propagate)
 
     return root
diff --git a/policyengine_core/periods/helpers.py b/policyengine_core/periods/helpers.py
index f8ee95b8..7fc4c634 100644
--- a/policyengine_core/periods/helpers.py
+++ b/policyengine_core/periods/helpers.py
@@ -47,29 +47,50 @@ def instant(instant):
     """
     if instant is None:
         return None
+
+    # Fast path for already-instant objects (most common after string)
     if isinstance(instant, periods.Instant):
         return instant
+
+    # String is the most common input type during parameter loading
     if isinstance(instant, str):
         return _instant_from_string(instant)
 
-    # For other types, create a cache key and check the cache
-    cache_key = None
+    # Fast path for datetime.date objects (common in uprating)
+    if isinstance(instant, datetime.date):
+        cache_key = (instant.year, instant.month, instant.day)
+        cached = _instant_cache.get(cache_key)
+        if cached is not None:
+            return cached
+        result = periods.Instant(cache_key)
+        _instant_cache[cache_key] = result
+        return result
+
     # Check Period before tuple since Period is a subclass of tuple
     if isinstance(instant, periods.Period):
         return instant.start
-    elif isinstance(instant, datetime.date):
-        cache_key = (instant.year, instant.month, instant.day)
-    elif isinstance(instant, int):
+
+    # Handle int input
+    if isinstance(instant, int):
         cache_key = (instant, 1, 1)
-    elif isinstance(instant, (tuple, list)):
+        cached = _instant_cache.get(cache_key)
+        if cached is not None:
+            return cached
+        result = periods.Instant(cache_key)
+        _instant_cache[cache_key] = result
+        return result
+
+    # Handle tuple/list input
+    if isinstance(instant, (tuple, list)):
         if len(instant) == 1:
             cache_key = (instant[0], 1, 1)
         elif len(instant) == 2:
             cache_key = (instant[0], instant[1], 1)
         elif len(instant) == 3:
             cache_key = tuple(instant)
+        else:
+            raise AssertionError(f"Invalid instant length: {len(instant)}")
 
-    if cache_key is not None:
         cached = _instant_cache.get(cache_key)
         if cached is not None:
             return cached
@@ -78,13 +99,7 @@ def instant(instant):
         return result
 
     # Fallback for unexpected types
-    assert isinstance(instant, tuple), instant
-    assert 1 <= len(instant) <= 3
-    if len(instant) == 1:
-        return periods.Instant((instant[0], 1, 1))
-    if len(instant) == 2:
-        return periods.Instant((instant[0], instant[1], 1))
-    return periods.Instant(instant)
+    raise TypeError(f"Unexpected instant type: {type(instant)}")
 
 
 def instant_date(instant):