Skip to content

Commit 77a7c36

Browse files
Feat: add support for CSV input data loading from file or inline (#2640)
1 parent 6af3187 commit 77a7c36

File tree

2 files changed

+224
-8
lines changed

2 files changed

+224
-8
lines changed

sqlmesh/core/test/definition.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import numpy as np
1212
import pandas as pd
13+
from io import StringIO
1314
from freezegun import freeze_time
1415
from pandas.api.types import is_object_dtype
1516
from sqlglot import Dialect, exp
@@ -24,6 +25,7 @@
2425
from sqlmesh.utils import UniqueKeyDict, random_id, type_is_known, yaml
2526
from sqlmesh.utils.date import pandas_timestamp_to_pydatetime
2627
from sqlmesh.utils.errors import ConfigError, TestError
28+
from sqlmesh.utils.yaml import load as yaml_load
2729

2830
if t.TYPE_CHECKING:
2931
from sqlglot.dialects.dialect import DialectType
@@ -328,6 +330,17 @@ def _normalize_rows(
328330
rows = values.get("rows")
329331
query = values.get("query")
330332

333+
format = values.get("format")
334+
path = values.get("path")
335+
if format == "csv":
336+
rows = pd.read_csv(path or StringIO(rows)).to_dict(orient="records")
337+
elif format in (None, "yaml"):
338+
if path:
339+
input_rows = yaml_load(Path(path))
340+
rows = input_rows.get("rows") if isinstance(input_rows, dict) else input_rows
341+
else:
342+
_raise_error(f"Unsupported data format '{format}' for '{name}'", self.path)
343+
331344
if query is not None:
332345
if rows is not None:
333346
_raise_error(

tests/core/test_test.py

Lines changed: 211 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -345,20 +345,34 @@ def test_row_order(sushi_context: Context, full_model_without_ctes: SqlModel) ->
345345
)
346346

347347

348-
def test_partial_data(sushi_context: Context) -> None:
348+
@pytest.mark.parametrize(
349+
"waiter_names_input",
350+
[
351+
"""sushi.waiter_names:
352+
- id: 1
353+
- id: 2
354+
name: null
355+
- id: 3
356+
name: 'bob'
357+
""",
358+
"""sushi.waiter_names:
359+
format: csv
360+
rows: |
361+
id,name
362+
1,
363+
2,null
364+
3,bob""",
365+
],
366+
)
367+
def test_partial_data(sushi_context: Context, waiter_names_input: str) -> None:
349368
_check_successful_or_raise(
350369
_create_test(
351370
body=load_yaml(
352-
"""
371+
f"""
353372
test_foo:
354373
model: sushi.foo
355374
inputs:
356-
sushi.waiter_names:
357-
- id: 1
358-
- id: 2
359-
name: null
360-
- id: 3
361-
name: 'bob'
375+
{waiter_names_input}
362376
outputs:
363377
ctes:
364378
source:
@@ -390,6 +404,195 @@ def test_partial_data(sushi_context: Context) -> None:
390404
)
391405

392406

407+
@pytest.mark.parametrize(
408+
"waiter_names_input",
409+
[
410+
"""sushi.waiter_names:
411+
format: yaml
412+
rows:
413+
- id: 1
414+
name: alice
415+
- id: 2
416+
name: 'bob'
417+
""",
418+
"""sushi.waiter_names:
419+
format: csv
420+
rows: |
421+
id,name
422+
1,alice
423+
2,bob""",
424+
],
425+
)
426+
def test_format_inline(sushi_context: Context, waiter_names_input: str) -> None:
427+
_check_successful_or_raise(
428+
_create_test(
429+
body=load_yaml(
430+
f"""
431+
test_foo:
432+
model: sushi.foo
433+
inputs:
434+
{waiter_names_input}
435+
outputs:
436+
query:
437+
- id: 1
438+
name: alice
439+
- id: 2
440+
name: 'bob'
441+
"""
442+
),
443+
test_name="test_foo",
444+
model=sushi_context.upsert_model(
445+
_create_model(
446+
"SELECT id, name FROM sushi.waiter_names ",
447+
default_catalog=sushi_context.default_catalog,
448+
)
449+
),
450+
context=sushi_context,
451+
).run()
452+
)
453+
454+
455+
@pytest.mark.parametrize(
456+
["input_data", "filename", "file_data"],
457+
[
458+
[
459+
"""sushi.waiter_names:
460+
format: yaml
461+
path: """,
462+
"test_data.yaml",
463+
"""- id: 1
464+
name: alice
465+
- id: 2
466+
name: 'bob'
467+
""",
468+
],
469+
[
470+
"""sushi.waiter_names:
471+
path: """,
472+
"test_data.yaml",
473+
"""rows:
474+
- id: 1
475+
name: alice
476+
- id: 2
477+
name: 'bob'
478+
""",
479+
],
480+
[
481+
"""sushi.waiter_names:
482+
format: csv
483+
path: """,
484+
"test_data.csv",
485+
"""id,name
486+
1,alice
487+
2,bob""",
488+
],
489+
],
490+
)
491+
def test_format_path(
492+
sushi_context: Context, tmp_path: Path, input_data: str, filename: str, file_data: str
493+
) -> None:
494+
test_csv_file = tmp_path / filename
495+
test_csv_file.write_text(file_data)
496+
497+
_check_successful_or_raise(
498+
_create_test(
499+
body=load_yaml(
500+
f"""
501+
test_foo:
502+
model: sushi.foo
503+
inputs:
504+
{input_data}{str(test_csv_file)}
505+
outputs:
506+
query:
507+
- id: 1
508+
name: alice
509+
- id: 2
510+
name: 'bob'
511+
"""
512+
),
513+
test_name="test_foo",
514+
model=sushi_context.upsert_model(
515+
_create_model(
516+
"SELECT id, name FROM sushi.waiter_names ",
517+
default_catalog=sushi_context.default_catalog,
518+
)
519+
),
520+
context=sushi_context,
521+
).run()
522+
)
523+
524+
525+
def test_unsupported_format_failure(
526+
sushi_context: Context, full_model_without_ctes: SqlModel
527+
) -> None:
528+
with pytest.raises(
529+
TestError,
530+
match="Unsupported data format 'xml' for 'sushi.waiter_names'",
531+
):
532+
_create_test(
533+
body=load_yaml(
534+
"""
535+
test_foo:
536+
model: sushi.foo
537+
description: XML format isn't supported to load data (fails intentionally)
538+
inputs:
539+
sushi.waiter_names:
540+
format: xml
541+
path: 'test_data.xml'
542+
outputs:
543+
query:
544+
- id: 1
545+
value: null
546+
"""
547+
),
548+
test_name="test_foo",
549+
model=sushi_context.upsert_model(full_model_without_ctes),
550+
context=sushi_context,
551+
)
552+
553+
with pytest.raises(
554+
TestError,
555+
match="Unsupported data format 'xml' for 'sushi.waiter_names'",
556+
):
557+
_create_test(
558+
body=load_yaml(
559+
"""
560+
test_foo:
561+
model: sushi.foo
562+
description: XML without path doesn't raise error
563+
inputs:
564+
sushi.waiter_names:
565+
format: xml
566+
rows: |
567+
<rows>
568+
<row>
569+
<id>1</id>
570+
<name>alice</name>
571+
</row>
572+
<row>
573+
<id>2</id>
574+
<name>bob</name>
575+
</row>
576+
</rows>
577+
outputs:
578+
query:
579+
- id: 1
580+
name: alice
581+
- id: 2
582+
name: 'bob'
583+
"""
584+
),
585+
test_name="test_foo",
586+
model=sushi_context.upsert_model(
587+
_create_model(
588+
"SELECT id, name FROM sushi.waiter_names ",
589+
default_catalog=sushi_context.default_catalog,
590+
)
591+
),
592+
context=sushi_context,
593+
)
594+
595+
393596
def test_partial_output_columns() -> None:
394597
_check_successful_or_raise(
395598
_create_test(

0 commit comments

Comments
 (0)