-
-
Notifications
You must be signed in to change notification settings - Fork 50.4k
Expand file tree
/
Copy pathcoordinate_compression.py
More file actions
143 lines (115 loc) · 3.86 KB
/
coordinate_compression.py
File metadata and controls
143 lines (115 loc) · 3.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
Coordinate Compression Utility
------------------------------
Fix for Issue #13226: Handles missing or invalid values (None, NaN)
to ensure consistent compression behavior.
This module provides a `CoordinateCompressor` class that safely compresses
and decompresses values from a list by mapping each unique valid value
to a unique integer index.
Invalid or non-comparable values (like None or NaN) are ignored during
compression mapping and return -1 when compressed.
"""
from __future__ import annotations
import math
from typing import Any
class CoordinateCompressor:
"""
CoordinateCompressor compresses comparable values to integer ranks.
Example:
>>> arr = [100, 10, 52, 83]
>>> cc = CoordinateCompressor(arr)
>>> cc.compress(100)
3
>>> cc.compress(52)
1
>>> cc.decompress(1)
52
>>> cc.compress(None)
-1
"""
def __init__(self, arr: list[Any]) -> None:
"""
Initialize the CoordinateCompressor with a list.
Args:
arr: The list of values to be compressed.
Invalid or missing values (None, NaN) are skipped when building
the mapping, ensuring consistent compression behavior.
>>> arr = [100, None, 52, 83, float("nan")]
>>> cc = CoordinateCompressor(arr)
>>> cc.compress(100)
2
>>> cc.compress(None)
-1
>>> cc.compress(float("nan"))
-1
"""
# Store the original list
self.original = list(arr)
# Filter valid (comparable) values — ignore None and NaN
valid_values = [
x
for x in arr
if x is not None and not (isinstance(x, float) and math.isnan(x))
]
# Sort and remove duplicates using dict.fromkeys for stable order
unique_sorted = sorted(dict.fromkeys(valid_values))
# Create mappings
self.coordinate_map: dict[Any, int] = {
v: i for i, v in enumerate(unique_sorted)
}
self.reverse_map: list[Any] = unique_sorted.copy()
# Track invalid values (for reference, not essential)
self.invalid_values: list[Any] = [
x for x in arr if x is None or (isinstance(x, float) and math.isnan(x))
]
def compress(self, original: Any) -> int:
"""
Compress a single value to its coordinate index.
Returns:
int: The compressed index, or -1 if invalid or not found.
>>> arr = [100, 10, 52, 83]
>>> cc = CoordinateCompressor(arr)
>>> cc.compress(10)
0
>>> cc.compress(7)
-1
>>> cc.compress(None)
-1
"""
# Handle invalid or missing values
if original is None:
return -1
if isinstance(original, float) and math.isnan(original):
return -1
return self.coordinate_map.get(original, -1)
def decompress(self, num: int) -> Any:
"""
Decompress an integer coordinate back to its original value.
Args:
num: Compressed index to decompress.
Returns:
The original value for valid indices, otherwise -1.
>>> arr = [100, 10, 52, 83]
>>> cc = CoordinateCompressor(arr)
>>> cc.decompress(0)
10
>>> cc.decompress(5)
-1
"""
if 0 <= num < len(self.reverse_map):
return self.reverse_map[num]
return -1
if __name__ == "__main__":
from doctest import testmod
testmod()
arr: list[Any] = [100, 10, 52, 83, None, float("nan")]
cc = CoordinateCompressor(arr)
print("Coordinate Compression Demo:\n")
for original in arr:
compressed = cc.compress(original)
decompressed = cc.decompress(compressed)
print(
f"Original: {original!r:>6} | "
f"Compressed: {compressed:>2} | "
f"Decompressed: {decompressed!r}"
)