Skip to content

Commit 4db102a

Browse files
Change lists CLI to use single JSONL files instead of directories.
1 parent 95772de commit 4db102a

File tree

11 files changed

+247
-186
lines changed

11 files changed

+247
-186
lines changed

bead/cli/deployment.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ def deployment() -> None:
5454
@click.argument(
5555
"lists_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
5656
)
57-
@click.argument("items_file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
57+
@click.argument(
58+
"items_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
59+
)
5860
@click.argument("output_dir", type=click.Path(path_type=Path))
5961
@click.option(
6062
"--experiment-type",

bead/cli/lists.py

Lines changed: 66 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ def lists() -> None:
4040

4141

4242
@click.command()
43-
@click.argument("items_file", type=click.Path(exists=True, path_type=Path))
44-
@click.argument("output_dir", type=click.Path(path_type=Path))
43+
@click.argument(
44+
"items_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
45+
)
46+
@click.argument("output_file", type=click.Path(dir_okay=False, path_type=Path))
4547
@click.option(
4648
"--strategy",
4749
type=click.Choice(["balanced", "random", "stratified"]),
@@ -88,7 +90,7 @@ def lists() -> None:
8890
def partition(
8991
ctx: click.Context,
9092
items_file: Path,
91-
output_dir: Path,
93+
output_file: Path,
9294
strategy: str,
9395
n_lists: int,
9496
list_constraint_files: tuple[Path, ...],
@@ -104,9 +106,9 @@ def partition(
104106
ctx : click.Context
105107
Click context object.
106108
items_file : Path
107-
Path to items file.
108-
output_dir : Path
109-
Output directory for list files.
109+
Path to items JSONL file.
110+
output_file : Path
111+
Output JSONL file for experiment lists (one list per line).
110112
strategy : str
111113
Partitioning strategy.
112114
n_lists : int
@@ -125,24 +127,25 @@ def partition(
125127
Examples
126128
--------
127129
# Balanced partitioning
128-
$ bead lists partition items.jsonl lists/ --n-lists 5 --strategy balanced
130+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 --strategy balanced
129131
130132
# With list constraints
131-
$ bead lists partition items.jsonl lists/ --n-lists 5 \\
133+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 \\
132134
--list-constraints constraints/unique.jsonl
133135
134136
# With batch constraints
135-
$ bead lists partition items.jsonl lists/ --n-lists 5 \\
137+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 \\
136138
--batch-constraints constraints/coverage.jsonl
137139
138140
# With both constraint types
139-
$ bead lists partition items.jsonl lists/ --n-lists 5 \\
141+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 \\
140142
--list-constraints constraints/unique.jsonl constraints/balance.jsonl \\
141143
--batch-constraints constraints/coverage.jsonl \\
142144
--max-iterations 10000
143145
144146
# Dry run to preview
145-
$ bead lists partition items.jsonl lists/ --n-lists 5 --strategy balanced --dry-run
147+
$ bead lists partition items.jsonl lists.jsonl \\
148+
--n-lists 5 --strategy balanced --dry-run
146149
"""
147150
try:
148151
if n_lists < 1:
@@ -248,25 +251,26 @@ def partition(
248251

249252
# Save lists (or show dry-run preview)
250253
if dry_run:
251-
print_info("[DRY RUN] Would create the following files:")
254+
print_info(f"[DRY RUN] Would write {len(experiment_lists)} lists to:")
255+
console.print(f" [dim]{output_file}[/dim]")
252256
for exp_list in experiment_lists:
253-
list_file = output_dir / f"list_{exp_list.list_number}.jsonl"
254257
console.print(
255-
f" [dim]{list_file}[/dim] ({len(exp_list.item_refs)} items)"
258+
f" list_{exp_list.list_number}: {len(exp_list.item_refs)} items"
256259
)
257260
print_info(
258261
f"[DRY RUN] Total: {len(experiment_lists)} lists, {len(items)} items"
259262
)
260263
else:
261-
output_dir.mkdir(parents=True, exist_ok=True)
262-
for exp_list in experiment_lists:
263-
list_file = output_dir / f"list_{exp_list.list_number}.jsonl"
264-
with open(list_file, "w", encoding="utf-8") as f:
264+
# Ensure parent directory exists
265+
output_file.parent.mkdir(parents=True, exist_ok=True)
266+
# Write all lists to single JSONL file (one list per line)
267+
with open(output_file, "w", encoding="utf-8") as f:
268+
for exp_list in experiment_lists:
265269
f.write(exp_list.model_dump_json() + "\n")
266270

267271
print_success(
268272
f"Created {len(experiment_lists)} lists "
269-
f"with {len(items)} items: {output_dir}"
273+
f"with {len(items)} items: {output_file}"
270274
)
271275

272276
# Show distribution
@@ -285,71 +289,48 @@ def partition(
285289

286290

287291
@click.command(name="list")
288-
@click.option(
289-
"--directory",
290-
type=click.Path(exists=True, file_okay=False, path_type=Path),
291-
default=Path.cwd(),
292-
help="Directory to search for list files",
293-
)
294-
@click.option(
295-
"--pattern",
296-
default="*.jsonl",
297-
help="File pattern to match (default: *.jsonl)",
292+
@click.argument(
293+
"lists_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
298294
)
299295
@click.pass_context
300296
def list_lists(
301297
ctx: click.Context,
302-
directory: Path,
303-
pattern: str,
298+
lists_file: Path,
304299
) -> None:
305-
"""List experiment list files in a directory.
300+
"""List experiment lists in a JSONL file.
306301
307302
Parameters
308303
----------
309304
ctx : click.Context
310305
Click context object.
311-
directory : Path
312-
Directory to search.
313-
pattern : str
314-
File pattern to match.
306+
lists_file : Path
307+
JSONL file containing experiment lists (one list per line).
315308
316309
Examples
317310
--------
318-
$ bead lists list
319-
$ bead lists list --directory experiment_lists/
320-
$ bead lists list --pattern "list_*.jsonl"
311+
$ bead lists list lists.jsonl
321312
"""
322313
try:
323-
files = list(directory.glob(pattern))
324-
325-
if not files:
326-
print_info(f"No files found in {directory} matching {pattern}")
327-
return
328-
329-
table = Table(title=f"Experiment Lists in {directory}")
330-
table.add_column("File", style="cyan")
314+
table = Table(title=f"Experiment Lists in {lists_file}")
331315
table.add_column("List #", justify="right", style="yellow")
316+
table.add_column("Name", style="cyan")
332317
table.add_column("Items", justify="right", style="green")
333-
table.add_column("Name", style="white")
334-
335-
for file_path in sorted(files):
336-
try:
337-
with open(file_path, encoding="utf-8") as f:
338-
first_line = f.readline().strip()
339-
if not first_line:
340-
continue
341-
342-
list_data = json.loads(first_line)
343-
exp_list = ExperimentList(**list_data)
344-
345-
table.add_row(
346-
str(file_path.name),
347-
str(exp_list.list_number),
348-
str(len(exp_list.item_refs)),
349-
exp_list.name,
350-
)
351-
except Exception:
352-
continue
318+
319+
with open(lists_file, encoding="utf-8") as f:
320+
for line in f:
321+
line = line.strip()
322+
if not line:
323+
continue
324+
try:
325+
list_data = json.loads(line)
326+
exp_list = ExperimentList(**list_data)
327+
table.add_row(
328+
str(exp_list.list_number),
329+
exp_list.name,
330+
str(len(exp_list.item_refs)),
331+
)
332+
except Exception:
333+
continue
353334

354335
console.print(table)
355336

@@ -405,43 +386,38 @@ def validate(ctx: click.Context, list_file: Path) -> None:
405386

406387
@click.command()
407388
@click.argument(
408-
"lists_dir", type=click.Path(exists=True, file_okay=False, path_type=Path)
389+
"lists_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
409390
)
410391
@click.pass_context
411-
def show_stats(ctx: click.Context, lists_dir: Path) -> None:
412-
"""Show statistics about experiment lists in a directory.
392+
def show_stats(ctx: click.Context, lists_file: Path) -> None:
393+
"""Show statistics about experiment lists in a JSONL file.
413394
414395
Parameters
415396
----------
416397
ctx : click.Context
417398
Click context object.
418-
lists_dir : Path
419-
Directory containing list files.
399+
lists_file : Path
400+
JSONL file containing experiment lists (one list per line).
420401
421402
Examples
422403
--------
423-
$ bead lists show-stats lists/
404+
$ bead lists show-stats lists.jsonl
424405
"""
425406
try:
426-
print_info(f"Analyzing experiment lists in: {lists_dir}")
427-
428-
list_files = list(lists_dir.glob("*.jsonl"))
429-
430-
if not list_files:
431-
print_error("No list files found")
432-
ctx.exit(1)
407+
print_info(f"Analyzing experiment lists in: {lists_file}")
433408

434409
lists_data: list[ExperimentList] = []
435-
for file_path in list_files:
436-
try:
437-
with open(file_path, encoding="utf-8") as f:
438-
first_line = f.readline().strip()
439-
if first_line:
440-
list_data = json.loads(first_line)
441-
exp_list = ExperimentList(**list_data)
442-
lists_data.append(exp_list)
443-
except Exception:
444-
continue
410+
with open(lists_file, encoding="utf-8") as f:
411+
for line in f:
412+
line = line.strip()
413+
if not line:
414+
continue
415+
try:
416+
list_data = json.loads(line)
417+
exp_list = ExperimentList(**list_data)
418+
lists_data.append(exp_list)
419+
except Exception:
420+
continue
445421

446422
if not lists_data:
447423
print_error("No valid experiment lists found")

bead/cli/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None
398398

399399
def _lazy_load(self, cmd_name: str) -> click.Command:
400400
"""Import and return a lazy command."""
401-
import importlib
401+
import importlib # noqa: PLC0415
402402

403403
module_path, attr_name = self._lazy_subcommands[cmd_name]
404404
module = importlib.import_module(module_path)

bead/cli/utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@
2525
from bead.data.base import JsonValue
2626

2727

28-
def _load_config() -> "type":
28+
def _load_config() -> type:
2929
"""Lazily import load_config to avoid slow startup."""
30-
from bead.config import load_config
30+
from bead.config import load_config # noqa: PLC0415
3131

3232
return load_config
3333

34+
3435
console = Console()
3536

3637

bead/lists/list_collection.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@
88
- Partitioning metadata tracking
99
- Coverage validation (ensuring all items are assigned exactly once)
1010
- List lookup by number
11+
- JSONL serialization (one list per line)
1112
"""
1213

1314
from __future__ import annotations
1415

16+
import json
17+
from pathlib import Path
1518
from typing import TYPE_CHECKING, TypedDict
1619
from uuid import UUID
1720

@@ -298,3 +301,78 @@ def validate_coverage(self, all_item_ids: set[UUID]) -> CoverageValidationResult
298301
"duplicate_items": duplicate_items,
299302
"total_assigned": sum(item_counts.values()),
300303
}
304+
305+
def to_jsonl(self, path: Path | str) -> None:
306+
"""Write lists to a JSONL file (one list per line).
307+
308+
Parameters
309+
----------
310+
path : Path | str
311+
Path to output JSONL file.
312+
313+
Examples
314+
--------
315+
>>> from uuid import uuid4
316+
>>> collection = ListCollection(
317+
... name="test",
318+
... source_items_id=uuid4(),
319+
... partitioning_strategy="balanced"
320+
... )
321+
>>> exp_list = ExperimentList(name="list_0", list_number=0)
322+
>>> collection.add_list(exp_list)
323+
>>> collection.to_jsonl("lists.jsonl") # doctest: +SKIP
324+
"""
325+
path = Path(path)
326+
path.parent.mkdir(parents=True, exist_ok=True)
327+
with open(path, "w", encoding="utf-8") as f:
328+
for exp_list in self.lists:
329+
f.write(exp_list.model_dump_json() + "\n")
330+
331+
@classmethod
332+
def from_jsonl(
333+
cls,
334+
path: Path | str,
335+
name: str = "loaded_lists",
336+
source_items_id: UUID | None = None,
337+
partitioning_strategy: str = "unknown",
338+
) -> ListCollection:
339+
"""Load lists from a JSONL file (one list per line).
340+
341+
Parameters
342+
----------
343+
path : Path | str
344+
Path to JSONL file containing experiment lists.
345+
name : str
346+
Name for the collection (default: "loaded_lists").
347+
source_items_id : UUID | None
348+
Source items UUID. If None, uses a nil UUID.
349+
partitioning_strategy : str
350+
Strategy name (default: "unknown").
351+
352+
Returns
353+
-------
354+
ListCollection
355+
Collection containing the loaded lists.
356+
357+
Examples
358+
--------
359+
>>> collection = ListCollection.from_jsonl("lists.jsonl") # doctest: +SKIP
360+
"""
361+
path = Path(path)
362+
lists: list[ExperimentList] = []
363+
364+
with open(path, encoding="utf-8") as f:
365+
for line in f:
366+
line = line.strip()
367+
if not line:
368+
continue
369+
list_data = json.loads(line)
370+
exp_list = ExperimentList(**list_data)
371+
lists.append(exp_list)
372+
373+
return cls(
374+
name=name,
375+
source_items_id=source_items_id or UUID(int=0),
376+
lists=lists,
377+
partitioning_strategy=partitioning_strategy,
378+
)

0 commit comments

Comments
 (0)