Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 27561c6

Browse files
committed
Refactor: split_key_space() now also returns start & end of range
1 parent 8443265 commit 27561c6

File tree

1 file changed

+8
-9
lines changed

1 file changed

+8
-9
lines changed

data_diff/table_segment.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,18 @@
1414

1515
RECOMMENDED_CHECKSUM_DURATION = 20
1616

17-
def split_key_space(min_key, max_key, count):
17+
def split_key_space(min_key: DbKey, max_key: DbKey, count: int):
1818
if max_key - min_key <= count:
1919
count = 1
2020

2121
if isinstance(min_key, ArithString):
2222
assert type(min_key) is type(max_key)
2323
checkpoints = min_key.range(max_key, count)
24-
assert all(min_key <= x <= max_key for x in checkpoints)
25-
return checkpoints
24+
else:
25+
checkpoints = split_space(min_key, max_key, count)
2626

27-
return split_space(min_key, max_key, count)
27+
assert all(min_key < x < max_key for x in checkpoints)
28+
return [min_key] + checkpoints + [max_key]
2829

2930

3031

@@ -128,7 +129,7 @@ def get_values(self) -> list:
128129
return self.database.query(select, List[Tuple])
129130

130131
def choose_checkpoints(self, count: int) -> List[DbKey]:
131-
"Suggests a bunch of evenly-spaced checkpoints to split by (not including start, end)"
132+
"Suggests a bunch of evenly-spaced checkpoints to split by, including start, end."
132133

133134
assert self.is_bounded
134135
return split_key_space(self.min_key, self.max_key, count)
@@ -137,12 +138,10 @@ def segment_by_checkpoints(self, checkpoints: List[DbKey]) -> List["TableSegment
137138
"Split the current TableSegment to a bunch of smaller ones, separated by the given checkpoints"
138139

139140
if self.min_key and self.max_key:
140-
assert all(self.min_key <= c < self.max_key for c in checkpoints)
141-
checkpoints.sort()
141+
assert all(self.min_key <= c <= self.max_key for c in checkpoints)
142142

143143
# Calculate sub-segments
144-
positions = [self.min_key] + checkpoints + [self.max_key]
145-
ranges = list(zip(positions[:-1], positions[1:]))
144+
ranges = list(zip(checkpoints[:-1], checkpoints[1:]))
146145

147146
# Create table segments
148147
tables = [self.new(min_key=s, max_key=e) for s, e in ranges]

0 commit comments

Comments
 (0)