From 41c3f4d51ffdd0a514c3a50248c50bfa7813896b Mon Sep 17 00:00:00 2001 From: lsabor Date: Sun, 7 Dec 2025 15:07:21 -0800 Subject: [PATCH 1/5] re-add check for unchanging values --- forecasting_tools/data_models/numeric_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py index a7bbdc0..9860b0d 100644 --- a/forecasting_tools/data_models/numeric_report.py +++ b/forecasting_tools/data_models/numeric_report.py @@ -57,7 +57,7 @@ def validate_percentiles(self: NumericDistribution) -> NumericDistribution: for i in range(len(percentiles) - 1): if percentiles[i].percentile >= percentiles[i + 1].percentile: raise ValueError("Percentiles must be in strictly increasing order") - if percentiles[i].value > percentiles[i + 1].value: + if percentiles[i].value >= percentiles[i + 1].value: raise ValueError("Values must be in strictly increasing order") if len(percentiles) < 2: raise ValueError("NumericDistribution must have at least 2 percentiles") From 27eb7e23be3244f2ab5f5460d2c6f58fbe3367b4 Mon Sep 17 00:00:00 2001 From: lsabor Date: Sun, 7 Dec 2025 15:42:41 -0800 Subject: [PATCH 2/5] rewrite _check_and_update_repeating_values --- .../data_models/numeric_report.py | 88 +++++++++++-------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py index 9860b0d..b69427e 100644 --- a/forecasting_tools/data_models/numeric_report.py +++ b/forecasting_tools/data_models/numeric_report.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from collections import Counter +from collections import Counter, defaultdict from typing import TYPE_CHECKING import numpy as np @@ -57,8 +57,8 @@ def validate_percentiles(self: NumericDistribution) -> NumericDistribution: for i in range(len(percentiles) - 1): if percentiles[i].percentile >= percentiles[i + 1].percentile: raise ValueError("Percentiles must be in strictly increasing order") - if percentiles[i].value >= percentiles[i + 1].value: - raise ValueError("Values must be in strictly increasing order") + if percentiles[i].value > percentiles[i + 1].value: + raise ValueError("Values must be in monotonically increasing order") if len(percentiles) < 2: raise ValueError("NumericDistribution must have at least 2 percentiles") @@ -84,47 +84,57 @@ def validate_percentiles(self: NumericDistribution) -> NumericDistribution: def _check_and_update_repeating_values( self, percentiles: list[Percentile] ) -> list[Percentile]: - unique_value_count = Counter(percentile.value for percentile in percentiles) - final_percentiles = [] + """ + for each location ("value"), get all the percentiles that map to it + if there are multiple at or below lower bound, take the largest "percentile" + if there are multiple at or above upper bound, take the smallest "percentile" + if there are multiple for an in-bound value, only take the largest and + smallest, and place the smallest 1/2 * 1/cdf_size below the "value". + """ + + final_percentiles: list[Percentile] = [] + percentile_by_value: dict[float, list[float]] = defaultdict(list) + lower_bounds: list[float] = [] + upper_bounds: list[float] = [] + for percentile in percentiles: - value = percentile.value - count = unique_value_count[value] - repeated_value = count > 1 - value_in_bounds = self.lower_bound < value < self.upper_bound - value_above_bound = value >= self.upper_bound - value_below_bound = value <= self.lower_bound - epsilon = 1e-10 - if not repeated_value: - final_percentiles.append(percentile) - elif value_in_bounds: - greater_epsilon = 1e-6 # TODO: Figure out why normal epsilon doesn't work. Could cause brittle behavior. - modification = (1 - percentile.percentile) * greater_epsilon - final_percentiles.append( - Percentile( - value=value - modification, - percentile=percentile.percentile, - ) + if percentile.value <= self.lower_bound: + lower_bounds.append(percentile.percentile) + elif percentile.value >= self.upper_bound: + upper_bounds.append(percentile.percentile) + else: + percentile_by_value[percentile.value].append(percentile.percentile) + + if lower_bounds: + final_percentiles.append( + Percentile( + value=self.lower_bound, + percentile=max(lower_bounds), ) - elif value_above_bound: - modification = epsilon * percentile.percentile - final_percentiles.append( - Percentile( - value=self.upper_bound + modification, - percentile=percentile.percentile, - ) + ) + for value, percentiles_at_value in sorted(percentile_by_value.items()): + least_percentile = min(percentiles_at_value) + greatest_percentile = max(percentiles_at_value) + final_percentiles.append( + Percentile( + value=value - 0.5 / ((self.cdf_size or 201) - 1), + percentile=least_percentile, ) - elif value_below_bound: - modification = epsilon * (1 - percentile.percentile) - final_percentiles.append( - Percentile( - value=self.lower_bound - modification, - percentile=percentile.percentile, - ) + ) + final_percentiles.append( + Percentile( + value=value, + percentile=greatest_percentile, ) - else: - raise ValueError( - f"Unexpected state: value {value} is repeated {count} times. Bound is {self.lower_bound} and {self.upper_bound}" + ) + if upper_bounds: + final_percentiles.append( + Percentile( + value=self.upper_bound, + percentile=min(upper_bounds), ) + ) + return final_percentiles def _check_too_far_from_bounds(self, percentiles: list[Percentile]) -> None: From 7e7f2926cdec4e9cba3b4d54698304b90add944c Mon Sep 17 00:00:00 2001 From: lsabor Date: Sun, 7 Dec 2025 16:15:22 -0800 Subject: [PATCH 3/5] improve tests and add tests for multiple duplicate values --- .vscode/settings.json | 3 + .../test_helpers/test_prediction_extractor.py | 108 ++++++++++++++---- .../data_models/numeric_report.py | 8 ++ 3 files changed, 99 insertions(+), 20 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f79c704..3660323 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -22,4 +22,7 @@ "jupyter.debugJustMyCode": true, "debugpy.debugJustMyCode": true, "cursorpyright.analysis.typeCheckingMode": "basic", + "python-envs.defaultEnvManager": "ms-python.python:poetry", + "python-envs.defaultPackageManager": "ms-python.python:poetry", + "python-envs.pythonProjects": [], } diff --git a/code_tests/unit_tests/test_helpers/test_prediction_extractor.py b/code_tests/unit_tests/test_helpers/test_prediction_extractor.py index 271f5ae..6950e48 100644 --- a/code_tests/unit_tests/test_helpers/test_prediction_extractor.py +++ b/code_tests/unit_tests/test_helpers/test_prediction_extractor.py @@ -452,6 +452,10 @@ def test_multiple_choice_extraction_failure(reasoning: str, options: list[str]) def create_numeric_question( magnitude_units: str | None = None, + upper_bound: float = 1, + lower_bound: float = 0, + open_upper_bound: bool = True, + open_lower_bound: bool = True, ) -> NumericQuestion: if magnitude_units is None: question_text = "How much will the stock market be worth in 2026? (exact value)" @@ -462,10 +466,10 @@ def create_numeric_question( return NumericQuestion( question_text=question_text, - upper_bound=1, - lower_bound=0, - open_upper_bound=True, - open_lower_bound=True, + upper_bound=upper_bound, + lower_bound=lower_bound, + open_upper_bound=open_upper_bound, + open_lower_bound=open_lower_bound, ) @@ -483,7 +487,7 @@ def create_numeric_question( Percentile(value=20, percentile=0.4), Percentile(value=30, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=100), ), ( """ @@ -496,7 +500,7 @@ def create_numeric_question( Percentile(value=2.123, percentile=0.4), Percentile(value=3.123, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=10), ), ( """ @@ -509,7 +513,7 @@ def create_numeric_question( Percentile(value=-10.45, percentile=0.4), Percentile(value=30, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=-100, upper_bound=100), ), ( """ @@ -526,7 +530,7 @@ def create_numeric_question( Percentile(value=-8, percentile=0.7), Percentile(value=31, percentile=0.8), ], - create_numeric_question(), + create_numeric_question(lower_bound=-100, upper_bound=100), ), ( """ @@ -539,7 +543,7 @@ def create_numeric_question( Percentile(value=-10, percentile=0.4), Percentile(value=-5.37, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=-50, upper_bound=10), ), ( """ @@ -552,7 +556,7 @@ def create_numeric_question( Percentile(value=2000000, percentile=0.4), Percentile(value=3000000, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=100000000), ), ( """ @@ -565,7 +569,7 @@ def create_numeric_question( Percentile(value=2000000, percentile=0.4), Percentile(value=3000000, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=100000000), ), ( """ @@ -578,7 +582,7 @@ def create_numeric_question( Percentile(value=2000000.454, percentile=0.4), Percentile(value=3000000.00, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=100000000), ), ( """ @@ -591,7 +595,9 @@ def create_numeric_question( Percentile(value=2.1, percentile=0.4), Percentile(value=3000, percentile=0.6), ], - create_numeric_question(magnitude_units="millions"), + create_numeric_question( + magnitude_units="millions", lower_bound=0, upper_bound=10000 + ), ), ( """ @@ -607,7 +613,7 @@ def create_numeric_question( Percentile(value=2000000.454, percentile=0.4), Percentile(value=3000000.00, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=100000000), ), ( """ @@ -629,7 +635,9 @@ def create_numeric_question( Percentile(value=4000, percentile=0.8), Percentile(value=5000, percentile=0.9), ], - create_numeric_question(magnitude_units="millions"), + create_numeric_question( + magnitude_units="millions", lower_bound=0, upper_bound=10000 + ), ), ( """ @@ -644,7 +652,7 @@ def create_numeric_question( Percentile(value=2000000, percentile=0.4), Percentile(value=3000000, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=100000000), ), ( """ @@ -664,7 +672,7 @@ def create_numeric_question( Percentile(value=2000000, percentile=0.4), Percentile(value=3000000, percentile=0.6), ], - create_numeric_question(), + create_numeric_question(lower_bound=0, upper_bound=100000000), ), ( # testing with non breaking spaces for commas (gpt o3 uses this) """ @@ -683,7 +691,9 @@ def create_numeric_question( Percentile(value=14500, percentile=0.8), Percentile(value=17000, percentile=0.9), ], - create_numeric_question(), + create_numeric_question( + magnitude_units="millions", lower_bound=0, upper_bound=100000 + ), ), ( # Testing with regular spaces (in case o3 decides this is also a good idea) """ @@ -702,7 +712,9 @@ def create_numeric_question( Percentile(value=14500, percentile=0.8), Percentile(value=17000, percentile=0.9), ], - create_numeric_question(), + create_numeric_question( + magnitude_units="millions", lower_bound=0, upper_bound=100000 + ), ), ( # Testing complicated spaces """ @@ -721,7 +733,63 @@ def create_numeric_question( Percentile(value=14500432, percentile=0.8), Percentile(value=17020432.432, percentile=0.9), ], - create_numeric_question(), + create_numeric_question(lower_bound=-100000000, upper_bound=100000000), + ), + ( # out of bounds values are clipped to the bounds + """ + Percentile 20: -0.1 + Percentile 40: 0.5 + Percentile 60: 0.9 + """, + [ + Percentile(value=0, percentile=0.2), + Percentile(value=0.5, percentile=0.4), + Percentile(value=0.9, percentile=0.6), + ], + create_numeric_question(lower_bound=0, upper_bound=1), + ), + ( # out of bounds values are clipped to the bounds + """ + Percentile 20: -0.1 + Percentile 40: 0.5 + Percentile 60: 0.9 + Percentile 80: 1.1 + """, + [ + Percentile(value=0, percentile=0.2), + Percentile(value=0.5, percentile=0.4), + Percentile(value=0.9, percentile=0.6), + Percentile(value=1.0, percentile=0.8), + ], + create_numeric_question(lower_bound=0, upper_bound=1), + ), + ( # equivalend percentiles get places carefully + """ + Percentile 20: 0.4 + Percentile 40: 0.4 + Percentile 60: 0.4 + Percentile 80: 0.4 + """, + [ + Percentile(value=0.4 - 0.5 / 200, percentile=0.2), + Percentile(value=0.4, percentile=0.8), + ], + create_numeric_question(lower_bound=0, upper_bound=1), + ), + ( # only 1 out of bounds value per side + """ + Percentile 20: -0.5 + Percentile 40: -0.1 + Percentile 50: 0.5 + Percentile 60: 1.1 + Percentile 80: 1.5 + """, + [ + Percentile(value=0, percentile=0.4), + Percentile(value=0.5, percentile=0.5), + Percentile(value=1.0, percentile=0.6), + ], + create_numeric_question(lower_bound=0, upper_bound=1), ), # ( # """ diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py index b69427e..37b708e 100644 --- a/forecasting_tools/data_models/numeric_report.py +++ b/forecasting_tools/data_models/numeric_report.py @@ -113,6 +113,14 @@ def _check_and_update_repeating_values( ) ) for value, percentiles_at_value in sorted(percentile_by_value.items()): + if len(percentiles_at_value) == 1: + final_percentiles.append( + Percentile( + value=value, + percentile=percentiles_at_value[0], + ) + ) + continue least_percentile = min(percentiles_at_value) greatest_percentile = max(percentiles_at_value) final_percentiles.append( From 742f63347fbdb4666cf6c4284a1da5fddf42b325 Mon Sep 17 00:00:00 2001 From: lsabor Date: Sun, 7 Dec 2025 16:16:07 -0800 Subject: [PATCH 4/5] remove unused import --- forecasting_tools/data_models/numeric_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forecasting_tools/data_models/numeric_report.py b/forecasting_tools/data_models/numeric_report.py index 37b708e..d563663 100644 --- a/forecasting_tools/data_models/numeric_report.py +++ b/forecasting_tools/data_models/numeric_report.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from collections import Counter, defaultdict +from collections import defaultdict from typing import TYPE_CHECKING import numpy as np From a860696c6ac40c254f7169b2a615ddf0ed8f2403 Mon Sep 17 00:00:00 2001 From: lsabor Date: Sun, 7 Dec 2025 16:17:01 -0800 Subject: [PATCH 5/5] revert settings change --- .vscode/settings.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 3660323..f79c704 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -22,7 +22,4 @@ "jupyter.debugJustMyCode": true, "debugpy.debugJustMyCode": true, "cursorpyright.analysis.typeCheckingMode": "basic", - "python-envs.defaultEnvManager": "ms-python.python:poetry", - "python-envs.defaultPackageManager": "ms-python.python:poetry", - "python-envs.pythonProjects": [], }