From e86f55fd7f0642ed03d3c3014072b3b134b368ef Mon Sep 17 00:00:00 2001 From: DevForge Engineer Date: Mon, 18 May 2026 02:20:42 -0400 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20restore=20npm-publish.yml=20?= =?UTF-8?q?=E2=80=94=20repo=20has=20legitimate=20npm=20wrapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The npm-publish.yml was incorrectly removed in PR #13 (alongside legitimate ruff fixes). This repo has a legitimate npm wrapper (package.json + cli.js) that allows users to install via 'npm install -g datamorph-cli'. The npm-publish workflow is needed to publish this wrapper to the npm registry. Reverts the npm-publish.yml removal from commit 8a7edca. --- .github/workflows/npm-publish.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/npm-publish.yml diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml new file mode 100644 index 0000000..cb14d77 --- /dev/null +++ b/.github/workflows/npm-publish.yml @@ -0,0 +1,27 @@ +name: Publish to npm + +on: + release: + types: [published] + workflow_dispatch: + +jobs: + publish: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - uses: actions/checkout@v6 + + - name: Set up Node.js + uses: actions/setup-node@v6 + with: + node-version: "22" + registry-url: "https://registry.npmjs.org" + + - name: Publish to npm + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + npm publish --access public From 973e23c07516924347e5cd2cc8b0bf4e8bfb6c3b Mon Sep 17 00:00:00 2001 From: DevForge Engineer Date: Mon, 18 May 2026 02:33:37 -0400 Subject: [PATCH 2/2] fix: bump actions/checkout to v6 in pages.yml and add missing test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix version mismatch: actions/checkout@v4 → @v6 in pages.yml (other workflows were already bumped via dependabot PR #2) - Add TestJsonlConversion: JSONL format was completely untested - Add TestBatchConversion: convert_batch() direct test coverage - Add TestTypeInference: _infer_type() with bool, None, date edge cases - Add TestTypeWidening: _widen_type() with all widening combinations - Add CLI --stream flag and formats streaming display tests --- .github/workflows/pages.yml | 2 +- tests/test_converters.py | 158 ++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index d2d8ccb..29402d3 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -18,7 +18,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Setup Pages uses: actions/configure-pages@v5 - name: Build with Jekyll diff --git a/tests/test_converters.py b/tests/test_converters.py index 9ec63a3..e0c70bc 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -9,9 +9,14 @@ from datamorph.cli import cli from datamorph.converters import ( + ConversionResult, + _infer_type, + _widen_type, convert, + convert_batch, detect_format, get_reader, + get_writer, supported_formats, ) @@ -366,6 +371,21 @@ def test_batch_no_input(self, runner, tmp_path): ]) assert result.exit_code == 0 + def test_convert_with_stream_flag(self, runner, sample_csv, tmp_path): + output = tmp_path / "out.json" + result = runner.invoke(cli, [ + "convert", str(sample_csv), str(output), + "--stream", + ]) + assert result.exit_code == 0 + assert "Converted" in result.output + + def test_formats_show_streaming(self, runner): + result = runner.invoke(cli, ["formats"]) + assert result.exit_code == 0 + assert "csv" in result.output + assert "jsonl" in result.output # jsonl listed as streaming-capable + # ── Multi-format Roundtrips ────────────────────────────────────────── @@ -410,3 +430,141 @@ def test_large_json_array(self, tmp_path): lines = output.read_text().strip().split("\n") assert len(lines) == 1001 # 1000 data + 1 header + + +# ── JSONL (JSON Lines) ──────────────────────────────────────────────── + + +class TestJsonlConversion: + def test_jsonl_to_json(self, tmp_path): + path = tmp_path / "data.jsonl" + path.write_text( + json.dumps({"name": "Alice", "age": 30}) + "\n" + + json.dumps({"name": "Bob", "age": 25}) + "\n" + ) + output = tmp_path / "out.json" + result = convert(path, output) + assert not result.errors + assert result.rows_written == 2 + data = json.loads(output.read_text()) + assert len(data) == 2 + assert data[0]["name"] == "Alice" + + def test_jsonl_to_csv(self, tmp_path): + path = tmp_path / "data.jsonl" + path.write_text( + json.dumps({"name": "Alice", "age": 30}) + "\n" + + json.dumps({"name": "Bob", "age": 25}) + "\n" + ) + output = tmp_path / "out.csv" + result = convert(path, output) + assert not result.errors + assert result.rows_written == 2 + content = output.read_text() + assert "Alice" in content + assert "name" in content + + def test_csv_to_jsonl(self, sample_csv, tmp_path): + output = tmp_path / "out.jsonl" + result = convert(sample_csv, output) + assert not result.errors + assert result.rows_written == 3 + lines = output.read_text().strip().split("\n") + assert len(lines) == 3 + data = json.loads(lines[0]) + assert data["name"] == "Alice" + + def test_jsonl_empty(self, tmp_path): + path = tmp_path / "empty.jsonl" + path.write_text("") + output = tmp_path / "out.json" + result = convert(path, output) + assert not result.errors + assert result.rows_written == 0 + + +# ── Batch Conversion ────────────────────────────────────────────────── + + +class TestBatchConversion: + def test_batch_single_file(self, sample_csv, tmp_path): + input_dir = sample_csv.parent + output_dir = tmp_path / "batch_out" + results = convert_batch( + str(input_dir), str(output_dir), + "csv", "json", pattern="test.csv", + ) + assert len(results) >= 1 + assert not results[0].errors + assert results[0].rows_written == 3 + assert (output_dir / "test.json").exists() + + def test_batch_no_matches(self, tmp_path): + input_dir = tmp_path / "empty_dir" + input_dir.mkdir() + output_dir = tmp_path / "batch_out" + results = convert_batch( + str(input_dir), str(output_dir), + "csv", "json", pattern="*.csv", + ) + assert results == [] + + +# ── Type inference ──────────────────────────────────────────────────── + + +class TestTypeInference: + def test_infer_bool(self): + assert _infer_type(True) == "bool" + assert _infer_type(False) == "bool" + + def test_infer_int(self): + assert _infer_type(42) == "int64" + assert _infer_type(0) == "int64" + assert _infer_type(-1) == "int64" + + def test_infer_float(self): + assert _infer_type(3.14) == "float64" + assert _infer_type(0.0) == "float64" + + def test_infer_string(self): + assert _infer_type("hello") == "string" + assert _infer_type("") == "string" + + def test_infer_date_string(self): + assert _infer_type("2024-01-15") == "date" + assert _infer_type("2026-05-18") == "date" + + def test_infer_none(self): + assert _infer_type(None) == "null" + + def test_infer_other(self): + assert _infer_type([1, 2, 3]) == "string" + assert _infer_type({"key": "val"}) == "string" + + +class TestTypeWidening: + def test_widen_identical(self): + assert _widen_type("int64", "int64") == "int64" + assert _widen_type("string", "string") == "string" + + def test_widen_int_to_float(self): + assert _widen_type("int64", "float64") == "float64" + assert _widen_type("float64", "int64") == "float64" + + def test_widen_to_string(self): + assert _widen_type("int64", "string") == "string" + assert _widen_type("string", "int64") == "string" + assert _widen_type("float64", "string") == "string" + assert _widen_type("bool", "string") == "string" + + def test_widen_from_null(self): + assert _widen_type("null", "int64") == "int64" + assert _widen_type("null", "float64") == "float64" + assert _widen_type("null", "string") == "string" + assert _widen_type("null", "bool") == "bool" + assert _widen_type("null", "date") == "date" + + def test_widen_unrelated(self): + assert _widen_type("date", "int64") == "string" + assert _widen_type("int64", "date") == "string"