From 41c3f4d51ffdd0a514c3a50248c50bfa7813896b Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 7 Dec 2025 15:07:21 -0800
Subject: [PATCH 1/5] re-add check for unchanging values

---
 forecasting_tools/data_models/numeric_report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py
index a7bbdc0..9860b0d 100644
--- a/forecasting_tools/data_models/numeric_report.py
+++ b/forecasting_tools/data_models/numeric_report.py
@@ -57,7 +57,7 @@ def validate_percentiles(self: NumericDistribution) -> NumericDistribution:
         for i in range(len(percentiles) - 1):
             if percentiles[i].percentile >= percentiles[i + 1].percentile:
                 raise ValueError("Percentiles must be in strictly increasing order")
-            if percentiles[i].value > percentiles[i + 1].value:
+            if percentiles[i].value >= percentiles[i + 1].value:
                 raise ValueError("Values must be in strictly increasing order")
         if len(percentiles) < 2:
             raise ValueError("NumericDistribution must have at least 2 percentiles")

From 27eb7e23be3244f2ab5f5460d2c6f58fbe3367b4 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 7 Dec 2025 15:42:41 -0800
Subject: [PATCH 2/5] rewrite _check_and_update_repeating_values

---
 .../data_models/numeric_report.py             | 88 +++++++++++--------
 1 file changed, 49 insertions(+), 39 deletions(-)

diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py
index 9860b0d..b69427e 100644
--- a/forecasting_tools/data_models/numeric_report.py
+++ b/forecasting_tools/data_models/numeric_report.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from collections import Counter
+from collections import Counter, defaultdict
 from typing import TYPE_CHECKING
 
 import numpy as np
@@ -57,8 +57,8 @@ def validate_percentiles(self: NumericDistribution) -> NumericDistribution:
         for i in range(len(percentiles) - 1):
             if percentiles[i].percentile >= percentiles[i + 1].percentile:
                 raise ValueError("Percentiles must be in strictly increasing order")
-            if percentiles[i].value >= percentiles[i + 1].value:
-                raise ValueError("Values must be in strictly increasing order")
+            if percentiles[i].value > percentiles[i + 1].value:
+                raise ValueError("Values must be in monotonically increasing order")
         if len(percentiles) < 2:
             raise ValueError("NumericDistribution must have at least 2 percentiles")
 
@@ -84,47 +84,57 @@ def validate_percentiles(self: NumericDistribution) -> NumericDistribution:
     def _check_and_update_repeating_values(
         self, percentiles: list[Percentile]
     ) -> list[Percentile]:
-        unique_value_count = Counter(percentile.value for percentile in percentiles)
-        final_percentiles = []
+        """
+        for each location ("value"), get all the percentiles that map to it
+        if there are multiple at or below lower bound, take the largest "percentile"
+        if there are multiple at or above upper bound, take the smallest "percentile"
+        if there are multiple for an in-bound value, only take the largest and
+            smallest, and place the smallest 1/2 * 1/cdf_size below the "value".
+        """
+
+        final_percentiles: list[Percentile] = []
+        percentile_by_value: dict[float, list[float]] = defaultdict(list)
+        lower_bounds: list[float] = []
+        upper_bounds: list[float] = []
+
         for percentile in percentiles:
-            value = percentile.value
-            count = unique_value_count[value]
-            repeated_value = count > 1
-            value_in_bounds = self.lower_bound < value < self.upper_bound
-            value_above_bound = value >= self.upper_bound
-            value_below_bound = value <= self.lower_bound
-            epsilon = 1e-10
-            if not repeated_value:
-                final_percentiles.append(percentile)
-            elif value_in_bounds:
-                greater_epsilon = 1e-6  # TODO: Figure out why normal epsilon doesn't work. Could cause brittle behavior.
-                modification = (1 - percentile.percentile) * greater_epsilon
-                final_percentiles.append(
-                    Percentile(
-                        value=value - modification,
-                        percentile=percentile.percentile,
-                    )
+            if percentile.value <= self.lower_bound:
+                lower_bounds.append(percentile.percentile)
+            elif percentile.value >= self.upper_bound:
+                upper_bounds.append(percentile.percentile)
+            else:
+                percentile_by_value[percentile.value].append(percentile.percentile)
+
+        if lower_bounds:
+            final_percentiles.append(
+                Percentile(
+                    value=self.lower_bound,
+                    percentile=max(lower_bounds),
                 )
-            elif value_above_bound:
-                modification = epsilon * percentile.percentile
-                final_percentiles.append(
-                    Percentile(
-                        value=self.upper_bound + modification,
-                        percentile=percentile.percentile,
-                    )
+            )
+        for value, percentiles_at_value in sorted(percentile_by_value.items()):
+            least_percentile = min(percentiles_at_value)
+            greatest_percentile = max(percentiles_at_value)
+            final_percentiles.append(
+                Percentile(
+                    value=value - 0.5 / ((self.cdf_size or 201) - 1),
+                    percentile=least_percentile,
                 )
-            elif value_below_bound:
-                modification = epsilon * (1 - percentile.percentile)
-                final_percentiles.append(
-                    Percentile(
-                        value=self.lower_bound - modification,
-                        percentile=percentile.percentile,
-                    )
+            )
+            final_percentiles.append(
+                Percentile(
+                    value=value,
+                    percentile=greatest_percentile,
                 )
-            else:
-                raise ValueError(
-                    f"Unexpected state: value {value} is repeated {count} times. Bound is {self.lower_bound} and {self.upper_bound}"
+            )
+        if upper_bounds:
+            final_percentiles.append(
+                Percentile(
+                    value=self.upper_bound,
+                    percentile=min(upper_bounds),
                 )
+            )
+
         return final_percentiles
 
     def _check_too_far_from_bounds(self, percentiles: list[Percentile]) -> None:

From 7e7f2926cdec4e9cba3b4d54698304b90add944c Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 7 Dec 2025 16:15:22 -0800
Subject: [PATCH 3/5] improve tests and add tests for multiple duplicate values

---
 .vscode/settings.json                         |   3 +
 .../test_helpers/test_prediction_extractor.py | 108 ++++++++++++++----
 .../data_models/numeric_report.py             |   8 ++
 3 files changed, 99 insertions(+), 20 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index f79c704..3660323 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -22,4 +22,7 @@
   "jupyter.debugJustMyCode": true,
   "debugpy.debugJustMyCode": true,
   "cursorpyright.analysis.typeCheckingMode": "basic",
+  "python-envs.defaultEnvManager": "ms-python.python:poetry",
+  "python-envs.defaultPackageManager": "ms-python.python:poetry",
+  "python-envs.pythonProjects": [],
 }
diff --git a/code_tests/unit_tests/test_helpers/test_prediction_extractor.py b/code_tests/unit_tests/test_helpers/test_prediction_extractor.py
index 271f5ae..6950e48 100644
--- a/code_tests/unit_tests/test_helpers/test_prediction_extractor.py
+++ b/code_tests/unit_tests/test_helpers/test_prediction_extractor.py
@@ -452,6 +452,10 @@ def test_multiple_choice_extraction_failure(reasoning: str, options: list[str])
 
 def create_numeric_question(
     magnitude_units: str | None = None,
+    upper_bound: float = 1,
+    lower_bound: float = 0,
+    open_upper_bound: bool = True,
+    open_lower_bound: bool = True,
 ) -> NumericQuestion:
     if magnitude_units is None:
         question_text = "How much will the stock market be worth in 2026? (exact value)"
@@ -462,10 +466,10 @@ def create_numeric_question(
 
     return NumericQuestion(
         question_text=question_text,
-        upper_bound=1,
-        lower_bound=0,
-        open_upper_bound=True,
-        open_lower_bound=True,
+        upper_bound=upper_bound,
+        lower_bound=lower_bound,
+        open_upper_bound=open_upper_bound,
+        open_lower_bound=open_lower_bound,
     )
 
 
@@ -483,7 +487,7 @@ def create_numeric_question(
                 Percentile(value=20, percentile=0.4),
                 Percentile(value=30, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=100),
         ),
         (
             """
@@ -496,7 +500,7 @@ def create_numeric_question(
                 Percentile(value=2.123, percentile=0.4),
                 Percentile(value=3.123, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=10),
         ),
         (
             """
@@ -509,7 +513,7 @@ def create_numeric_question(
                 Percentile(value=-10.45, percentile=0.4),
                 Percentile(value=30, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=-100, upper_bound=100),
         ),
         (
             """
@@ -526,7 +530,7 @@ def create_numeric_question(
                 Percentile(value=-8, percentile=0.7),
                 Percentile(value=31, percentile=0.8),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=-100, upper_bound=100),
         ),
         (
             """
@@ -539,7 +543,7 @@ def create_numeric_question(
                 Percentile(value=-10, percentile=0.4),
                 Percentile(value=-5.37, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=-50, upper_bound=10),
         ),
         (
             """
@@ -552,7 +556,7 @@ def create_numeric_question(
                 Percentile(value=2000000, percentile=0.4),
                 Percentile(value=3000000, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=100000000),
         ),
         (
             """
@@ -565,7 +569,7 @@ def create_numeric_question(
                 Percentile(value=2000000, percentile=0.4),
                 Percentile(value=3000000, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=100000000),
         ),
         (
             """
@@ -578,7 +582,7 @@ def create_numeric_question(
                 Percentile(value=2000000.454, percentile=0.4),
                 Percentile(value=3000000.00, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=100000000),
         ),
         (
             """
@@ -591,7 +595,9 @@ def create_numeric_question(
                 Percentile(value=2.1, percentile=0.4),
                 Percentile(value=3000, percentile=0.6),
             ],
-            create_numeric_question(magnitude_units="millions"),
+            create_numeric_question(
+                magnitude_units="millions", lower_bound=0, upper_bound=10000
+            ),
         ),
         (
             """
@@ -607,7 +613,7 @@ def create_numeric_question(
                 Percentile(value=2000000.454, percentile=0.4),
                 Percentile(value=3000000.00, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=100000000),
         ),
         (
             """
@@ -629,7 +635,9 @@ def create_numeric_question(
                 Percentile(value=4000, percentile=0.8),
                 Percentile(value=5000, percentile=0.9),
             ],
-            create_numeric_question(magnitude_units="millions"),
+            create_numeric_question(
+                magnitude_units="millions", lower_bound=0, upper_bound=10000
+            ),
         ),
         (
             """
@@ -644,7 +652,7 @@ def create_numeric_question(
                 Percentile(value=2000000, percentile=0.4),
                 Percentile(value=3000000, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=100000000),
         ),
         (
             """
@@ -664,7 +672,7 @@ def create_numeric_question(
                 Percentile(value=2000000, percentile=0.4),
                 Percentile(value=3000000, percentile=0.6),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=0, upper_bound=100000000),
         ),
         (  # testing with non breaking spaces for commas (gpt o3 uses this)
             """
@@ -683,7 +691,9 @@ def create_numeric_question(
                 Percentile(value=14500, percentile=0.8),
                 Percentile(value=17000, percentile=0.9),
             ],
-            create_numeric_question(),
+            create_numeric_question(
+                magnitude_units="millions", lower_bound=0, upper_bound=100000
+            ),
         ),
         (  # Testing with regular spaces (in case o3 decides this is also a good idea)
             """
@@ -702,7 +712,9 @@ def create_numeric_question(
                 Percentile(value=14500, percentile=0.8),
                 Percentile(value=17000, percentile=0.9),
             ],
-            create_numeric_question(),
+            create_numeric_question(
+                magnitude_units="millions", lower_bound=0, upper_bound=100000
+            ),
         ),
         (  # Testing complicated spaces
             """
@@ -721,7 +733,63 @@ def create_numeric_question(
                 Percentile(value=14500432, percentile=0.8),
                 Percentile(value=17020432.432, percentile=0.9),
             ],
-            create_numeric_question(),
+            create_numeric_question(lower_bound=-100000000, upper_bound=100000000),
+        ),
+        (  # out of bounds values are clipped to the bounds
+            """
+            Percentile 20: -0.1
+            Percentile 40: 0.5
+            Percentile 60: 0.9
+            """,
+            [
+                Percentile(value=0, percentile=0.2),
+                Percentile(value=0.5, percentile=0.4),
+                Percentile(value=0.9, percentile=0.6),
+            ],
+            create_numeric_question(lower_bound=0, upper_bound=1),
+        ),
+        (  # out of bounds values are clipped to the bounds
+            """
+            Percentile 20: -0.1
+            Percentile 40: 0.5
+            Percentile 60: 0.9
+            Percentile 80: 1.1
+            """,
+            [
+                Percentile(value=0, percentile=0.2),
+                Percentile(value=0.5, percentile=0.4),
+                Percentile(value=0.9, percentile=0.6),
+                Percentile(value=1.0, percentile=0.8),
+            ],
+            create_numeric_question(lower_bound=0, upper_bound=1),
+        ),
+        (  # equivalend percentiles get places carefully
+            """
+            Percentile 20: 0.4
+            Percentile 40: 0.4
+            Percentile 60: 0.4
+            Percentile 80: 0.4
+            """,
+            [
+                Percentile(value=0.4 - 0.5 / 200, percentile=0.2),
+                Percentile(value=0.4, percentile=0.8),
+            ],
+            create_numeric_question(lower_bound=0, upper_bound=1),
+        ),
+        (  # only 1 out of bounds value per side
+            """
+            Percentile 20: -0.5
+            Percentile 40: -0.1
+            Percentile 50: 0.5
+            Percentile 60: 1.1
+            Percentile 80: 1.5
+            """,
+            [
+                Percentile(value=0, percentile=0.4),
+                Percentile(value=0.5, percentile=0.5),
+                Percentile(value=1.0, percentile=0.6),
+            ],
+            create_numeric_question(lower_bound=0, upper_bound=1),
         ),
         # (
         #     """
diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py
index b69427e..37b708e 100644
--- a/forecasting_tools/data_models/numeric_report.py
+++ b/forecasting_tools/data_models/numeric_report.py
@@ -113,6 +113,14 @@ def _check_and_update_repeating_values(
                 )
             )
         for value, percentiles_at_value in sorted(percentile_by_value.items()):
+            if len(percentiles_at_value) == 1:
+                final_percentiles.append(
+                    Percentile(
+                        value=value,
+                        percentile=percentiles_at_value[0],
+                    )
+                )
+                continue
             least_percentile = min(percentiles_at_value)
             greatest_percentile = max(percentiles_at_value)
             final_percentiles.append(

From 742f63347fbdb4666cf6c4284a1da5fddf42b325 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 7 Dec 2025 16:16:07 -0800
Subject: [PATCH 4/5] remove unused import

---
 forecasting_tools/data_models/numeric_report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py
index 37b708e..d563663 100644
--- a/forecasting_tools/data_models/numeric_report.py
+++ b/forecasting_tools/data_models/numeric_report.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from collections import Counter, defaultdict
+from collections import defaultdict
 from typing import TYPE_CHECKING
 
 import numpy as np

From a860696c6ac40c254f7169b2a615ddf0ed8f2403 Mon Sep 17 00:00:00 2001
From: lsabor <lukesabor@gmail.com>
Date: Sun, 7 Dec 2025 16:17:01 -0800
Subject: [PATCH 5/5] revert settings change

---
 .vscode/settings.json | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 3660323..f79c704 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -22,7 +22,4 @@
   "jupyter.debugJustMyCode": true,
   "debugpy.debugJustMyCode": true,
   "cursorpyright.analysis.typeCheckingMode": "basic",
-  "python-envs.defaultEnvManager": "ms-python.python:poetry",
-  "python-envs.defaultPackageManager": "ms-python.python:poetry",
-  "python-envs.pythonProjects": [],
 }