|
9 | 9 | import torch.nn as nn |
10 | 10 |
|
11 | 11 | from integrations.hf.model_infer import ( |
| 12 | + GreenTextStreamer, |
12 | 13 | RSRLinear, |
13 | 14 | _bitnet_act_quant, |
14 | 15 | _detect_device_from_dir, |
| 16 | + _print_inference_stats, |
15 | 17 | _resolve_module, |
16 | 18 | _set_module, |
17 | 19 | parse_args, |
@@ -405,3 +407,154 @@ def test_default_mode_is_multiply(self): |
405 | 407 | ws = torch.tensor([3.0]) |
406 | 408 | layer = RSRLinear("test", meta, arrays, weight_scale=ws) |
407 | 409 | assert layer._weight_scale_mode == "multiply" |
| 410 | + |
| 411 | + |
| 412 | +# --------------------------------------------------------------------------- |
| 413 | +# GreenTextStreamer |
| 414 | +# --------------------------------------------------------------------------- |
| 415 | + |
| 416 | +class TestGreenTextStreamer: |
| 417 | + def test_output_wrapped_in_green(self, capsys): |
| 418 | + """on_finalized_text prints text wrapped in ANSI green codes.""" |
| 419 | + from transformers import AutoTokenizer |
| 420 | + |
| 421 | + tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
| 422 | + streamer = GreenTextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
| 423 | + streamer.on_finalized_text("hello", stream_end=False) |
| 424 | + captured = capsys.readouterr() |
| 425 | + assert "\033[32m" in captured.out |
| 426 | + assert "hello" in captured.out |
| 427 | + assert "\033[0m" in captured.out |
| 428 | + |
| 429 | + def test_stream_end_adds_newline(self, capsys): |
| 430 | + """stream_end=True terminates with a newline.""" |
| 431 | + from transformers import AutoTokenizer |
| 432 | + |
| 433 | + tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
| 434 | + streamer = GreenTextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
| 435 | + streamer.on_finalized_text("done", stream_end=True) |
| 436 | + captured = capsys.readouterr() |
| 437 | + assert captured.out.endswith("\n") |
| 438 | + |
| 439 | + def test_green_codes_present_without_tokenizer(self, capsys): |
| 440 | + """GreenTextStreamer wraps any text in green regardless of content.""" |
| 441 | + from transformers import AutoTokenizer |
| 442 | + |
| 443 | + tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
| 444 | + streamer = GreenTextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
| 445 | + for token in ["The", " quick", " brown", " fox"]: |
| 446 | + streamer.on_finalized_text(token, stream_end=False) |
| 447 | + captured = capsys.readouterr() |
| 448 | + assert captured.out.count("\033[32m") == 4 |
| 449 | + assert captured.out.count("\033[0m") == 4 |
| 450 | + |
| 451 | + |
| 452 | +# --------------------------------------------------------------------------- |
| 453 | +# _print_inference_stats |
| 454 | +# --------------------------------------------------------------------------- |
| 455 | + |
| 456 | +class TestPrintInferenceStats: |
| 457 | + def test_contains_required_fields(self, capsys): |
| 458 | + _print_inference_stats(n_tokens=42, elapsed=1.234) |
| 459 | + out = capsys.readouterr().out |
| 460 | + assert "tokens" in out |
| 461 | + assert "time" in out |
| 462 | + assert "tok/s" in out |
| 463 | + |
| 464 | + def test_token_count_displayed(self, capsys): |
| 465 | + _print_inference_stats(n_tokens=100, elapsed=2.0) |
| 466 | + out = capsys.readouterr().out |
| 467 | + assert "100" in out |
| 468 | + |
| 469 | + def test_elapsed_time_displayed(self, capsys): |
| 470 | + _print_inference_stats(n_tokens=10, elapsed=3.5) |
| 471 | + out = capsys.readouterr().out |
| 472 | + assert "3.500 s" in out |
| 473 | + |
| 474 | + def test_throughput_displayed(self, capsys): |
| 475 | + _print_inference_stats(n_tokens=50, elapsed=2.0) |
| 476 | + out = capsys.readouterr().out |
| 477 | + assert "25.0" in out # 50 / 2.0 |
| 478 | + |
| 479 | + def test_zero_elapsed_no_crash(self, capsys): |
| 480 | + _print_inference_stats(n_tokens=10, elapsed=0.0) |
| 481 | + out = capsys.readouterr().out |
| 482 | + assert "tok/s" in out |
| 483 | + assert "inf" in out |
| 484 | + |
| 485 | + def test_table_borders(self, capsys): |
| 486 | + _print_inference_stats(n_tokens=5, elapsed=0.5) |
| 487 | + out = capsys.readouterr().out |
| 488 | + assert "┌" in out and "┐" in out |
| 489 | + assert "└" in out and "┘" in out |
| 490 | + assert "│" in out |
| 491 | + |
| 492 | + def test_output_is_bold_cyan(self, capsys): |
| 493 | + _print_inference_stats(n_tokens=10, elapsed=1.0) |
| 494 | + out = capsys.readouterr().out |
| 495 | + assert "\033[1;36m" in out # bold cyan |
| 496 | + assert "\033[0m" in out # reset after each line |
| 497 | + |
| 498 | + |
| 499 | +# --------------------------------------------------------------------------- |
| 500 | +# Stream header ("▶ response") |
| 501 | +# --------------------------------------------------------------------------- |
| 502 | + |
| 503 | +class TestStreamHeader: |
| 504 | + def test_header_printed_before_tokens(self, capsys, monkeypatch): |
| 505 | + """generate_text prints a bold-cyan '▶ response' line before streaming.""" |
| 506 | + import integrations.hf.model_infer as mi |
| 507 | + |
| 508 | + # Minimal stubs so generate_text can run without a real model/tokenizer. |
| 509 | + fake_ids = torch.tensor([[1, 2, 3, 4]]) # 4 tokens, no prompt |
| 510 | + |
| 511 | + class _FakeTokenizer: |
| 512 | + pad_token_id = 0 |
| 513 | + def __call__(self, prompt, return_tensors): |
| 514 | + return {"input_ids": fake_ids[:, :1]} # 1-token "prompt" |
| 515 | + def decode(self, ids, skip_special_tokens): |
| 516 | + return "ok" |
| 517 | + |
| 518 | + class _FakeModel(torch.nn.Module): |
| 519 | + def parameters(self): |
| 520 | + return iter([torch.empty(1)]) |
| 521 | + def generate(self, **kwargs): |
| 522 | + return fake_ids |
| 523 | + |
| 524 | + monkeypatch.setattr(mi, "_print_inference_stats", lambda *a, **k: None) |
| 525 | + |
| 526 | + mi.generate_text( |
| 527 | + _FakeModel(), _FakeTokenizer(), "hi", |
| 528 | + use_chat_template=False, stream=True, |
| 529 | + ) |
| 530 | + out = capsys.readouterr().out |
| 531 | + assert "▶ response" in out |
| 532 | + assert "\033[1;36m" in out |
| 533 | + |
| 534 | + def test_header_absent_without_stream(self, capsys, monkeypatch): |
| 535 | + """No header is printed when stream=False.""" |
| 536 | + import integrations.hf.model_infer as mi |
| 537 | + |
| 538 | + fake_ids = torch.tensor([[1, 2]]) |
| 539 | + |
| 540 | + class _FakeTokenizer: |
| 541 | + pad_token_id = 0 |
| 542 | + def __call__(self, prompt, return_tensors): |
| 543 | + return {"input_ids": fake_ids[:, :1]} |
| 544 | + def decode(self, ids, skip_special_tokens): |
| 545 | + return "ok" |
| 546 | + |
| 547 | + class _FakeModel(torch.nn.Module): |
| 548 | + def parameters(self): |
| 549 | + return iter([torch.empty(1)]) |
| 550 | + def generate(self, **kwargs): |
| 551 | + return fake_ids |
| 552 | + |
| 553 | + monkeypatch.setattr(mi, "_print_inference_stats", lambda *a, **k: None) |
| 554 | + |
| 555 | + mi.generate_text( |
| 556 | + _FakeModel(), _FakeTokenizer(), "hi", |
| 557 | + use_chat_template=False, stream=False, |
| 558 | + ) |
| 559 | + out = capsys.readouterr().out |
| 560 | + assert "▶ response" not in out |
0 commit comments