From 7b4c635dd97300c11855db56a44856d1a90a5ff6 Mon Sep 17 00:00:00 2001
From: DhanashreePetare <dhanashreepetare8125@gmail.com>
Date: Fri, 5 Jun 2026 15:28:17 +0530
Subject: [PATCH 1/7] gsoc26: Layer 2 with tests initial commit

---
 databusclient/api/convert.py  | 383 ++++++++++++++++++++++++++++++++++
 databusclient/api/download.py |  77 +++++++
 databusclient/cli.py          |  11 +
 run_all_conversion_tests.py   | 338 ++++++++++++++++++++++++++++++
 tests/test_conversion.py      | 309 +++++++++++++++++++++++++++
 5 files changed, 1118 insertions(+)
 create mode 100644 databusclient/api/convert.py
 create mode 100644 run_all_conversion_tests.py
 create mode 100644 tests/test_conversion.py

diff --git a/databusclient/api/convert.py b/databusclient/api/convert.py
new file mode 100644
index 0000000..8d28fd8
--- /dev/null
+++ b/databusclient/api/convert.py
@@ -0,0 +1,383 @@
+"""Format and Mapping Conversion Layer.
+
+Layer 2: Within-class format conversion (lossless).
+Layer 3: Cross-class mapping conversion (quasi-equal for RDF <-> Tabular).
+"""
+
+import csv
+import json
+import os
+from typing import Optional
+
+from rdflib import Dataset, Graph
+
+
+# ---------------------------------------------------------------------------
+# Format registries
+# ---------------------------------------------------------------------------
+
+# Maps CLI format name -> rdflib format string
+RDF_TRIPLE_FORMATS = {
+    "ntriples": "ntriples",
+    "turtle": "turtle",
+    "rdf-xml": "xml",
+}
+
+RDF_QUAD_FORMATS = {
+    "nquads": "nquads",
+    "trig": "trig",
+    "trix": "trix",
+    "json-ld": "json-ld",
+}
+
+TABULAR_FORMATS = {
+    "csv": ",",
+    "tsv": "\t",
+}
+
+ALL_FORMATS = (
+    list(RDF_TRIPLE_FORMATS)
+    + list(RDF_QUAD_FORMATS)
+    + list(TABULAR_FORMATS)
+)
+
+# Maps file extension -> CLI format name
+EXTENSION_TO_FORMAT = {
+    ".ttl": "turtle",
+    ".nt": "ntriples",
+    ".rdf": "rdf-xml",
+    ".xml": "rdf-xml",
+    ".owl": "rdf-xml",
+    ".nq": "nquads",
+    ".trig": "trig",
+    ".trix": "trix",
+    ".jsonld": "json-ld",
+    ".json": "json-ld",
+    ".csv": "csv",
+    ".tsv": "tsv",
+}
+
+
+# ---------------------------------------------------------------------------
+# Format detection
+# ---------------------------------------------------------------------------
+
+def detect_format_from_filename(filename: str) -> Optional[str]:
+    """Detect format from file extension, ignoring compression extensions.
+
+    Args:
+        filename: File name or path.
+
+    Returns:
+        Format name string or None if not detectable.
+    """
+    name = filename.lower()
+
+    # strip compression extension first
+    for ext in (".bz2", ".gz", ".xz"):
+        if name.endswith(ext):
+            name = name[: -len(ext)]
+            break
+
+    # match longest extension first to avoid .json matching before .jsonld
+    for ext in sorted(EXTENSION_TO_FORMAT.keys(), key=len, reverse=True):
+        if name.endswith(ext):
+            return EXTENSION_TO_FORMAT[ext]
+
+    return None
+
+
+def get_format_class(fmt: str) -> str:
+    """Return equivalence class for a format name.
+
+    Args:
+        fmt: Format name (e.g. 'turtle', 'nquads', 'csv').
+
+    Returns:
+        'triples', 'quads', or 'tabular'.
+
+    Raises:
+        ValueError: If format is not recognised.
+    """
+    if fmt in RDF_TRIPLE_FORMATS:
+        return "triples"
+    if fmt in RDF_QUAD_FORMATS:
+        return "quads"
+    if fmt in TABULAR_FORMATS:
+        return "tabular"
+    raise ValueError(
+        f"Unknown format: '{fmt}'. Supported formats: {ALL_FORMATS}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Output filename helper
+# ---------------------------------------------------------------------------
+
+# Maps format name -> file extension
+FORMAT_TO_EXTENSION = {
+    "ntriples": ".nt",
+    "turtle": ".ttl",
+    "rdf-xml": ".rdf",
+    "nquads": ".nq",
+    "trig": ".trig",
+    "trix": ".trix",
+    "json-ld": ".jsonld",
+    "csv": ".csv",
+    "tsv": ".tsv",
+}
+
+
+def get_converted_filename(original_filename: str, convert_format: str) -> str:
+    """Generate output filename after format conversion.
+
+    Strips compression extension if present, then replaces the format
+    extension with the target format extension.
+
+    Args:
+        original_filename: Original file name (basename only, not full path).
+        convert_format: Target format name.
+
+    Returns:
+        New filename with updated extension.
+    """
+    name = original_filename
+
+    # strip compression extension
+    for ext in (".bz2", ".gz", ".xz"):
+        if name.lower().endswith(ext):
+            name = name[: -len(ext)]
+            break
+
+    # strip existing format extension
+    for old_ext in sorted(FORMAT_TO_EXTENSION.values(), key=len, reverse=True):
+        if name.lower().endswith(old_ext):
+            name = name[: -len(old_ext)]
+            break
+
+    target_ext = FORMAT_TO_EXTENSION.get(convert_format, f".{convert_format}")
+    return name + target_ext
+
+
+# ---------------------------------------------------------------------------
+# Layer 2 — within-class format conversion
+# ---------------------------------------------------------------------------
+
+def convert_rdf_triple_format(
+    input_file: str,
+    output_file: str,
+    input_format: str,
+    output_format: str,
+) -> None:
+    """Convert between RDF triple serialization formats (Layer 2).
+
+    Handles: ntriples, turtle, rdf-xml.
+    Uses rdflib Graph as internal representation.
+
+    Args:
+        input_file: Path to input file.
+        output_file: Path to write converted output.
+        input_format: Source format name (must be in RDF_TRIPLE_FORMATS).
+        output_format: Target format name (must be in RDF_TRIPLE_FORMATS).
+    """
+    g = Graph()
+    g.parse(input_file, format=RDF_TRIPLE_FORMATS[input_format])
+    g.serialize(destination=output_file, format=RDF_TRIPLE_FORMATS[output_format])
+    print(
+        f"Converted {input_format} -> {output_format}: {os.path.basename(output_file)}"
+    )
+
+
+def convert_rdf_quad_format(
+    input_file: str,
+    output_file: str,
+    input_format: str,
+    output_format: str,
+) -> None:
+    """Convert between RDF quad serialization formats (Layer 2).
+
+    Handles: nquads, trig, trix, json-ld.
+    Uses rdflib Dataset as internal representation
+    to preserve named graph information.
+
+    Args:
+        input_file: Path to input file.
+        output_file: Path to write converted output.
+        input_format: Source format name (must be in RDF_QUAD_FORMATS).
+        output_format: Target format name (must be in RDF_QUAD_FORMATS).
+    """
+    g = Dataset()
+    g.parse(input_file, format=RDF_QUAD_FORMATS[input_format])
+    g.serialize(destination=output_file, format=RDF_QUAD_FORMATS[output_format])
+    print(
+        f"Converted {input_format} -> {output_format}: {os.path.basename(output_file)}"
+    )
+
+
+def convert_tabular_format(
+    input_file: str,
+    output_file: str,
+    input_format: str,
+    output_format: str,
+) -> None:
+    """Convert between tabular formats (Layer 2).
+
+    Handles: csv <-> tsv.
+    Uses Python built-in csv module.
+
+    Args:
+        input_file: Path to input file.
+        output_file: Path to write converted output.
+        input_format: Source format name ('csv' or 'tsv').
+        output_format: Target format name ('csv' or 'tsv').
+    """
+    input_delimiter = TABULAR_FORMATS[input_format]
+    output_delimiter = TABULAR_FORMATS[output_format]
+
+    with open(input_file, "r", newline="", encoding="utf-8") as infile:
+        reader = csv.reader(infile, delimiter=input_delimiter)
+        rows = list(reader)
+
+    with open(output_file, "w", newline="", encoding="utf-8") as outfile:
+        writer = csv.writer(outfile, delimiter=output_delimiter)
+        writer.writerows(rows)
+
+    print(
+        f"Converted {input_format} -> {output_format}: {os.path.basename(output_file)}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Layer 3 — cross-class mapping conversion
+# ---------------------------------------------------------------------------
+
+def convert_rdf_to_csv(
+    input_file: str,
+    output_file: str,
+    input_format: str,
+) -> None:
+    """Map RDF triples to a wide CSV table (Layer 3).
+
+    Each unique subject becomes a row. Each unique predicate becomes a column.
+    Multi-valued predicates are pipe-separated.
+    A companion .meta.json file is generated alongside the CSV to preserve
+    RDF datatype and language tag information for lossless round trips.
+
+    Args:
+        input_file: Path to input RDF triples file.
+        output_file: Path to write output CSV file.
+        input_format: Source triple format name (must be in RDF_TRIPLE_FORMATS).
+    """
+    g = Graph()
+    g.parse(input_file, format=RDF_TRIPLE_FORMATS[input_format])
+
+    predicates = sorted(set(str(p) for s, p, o in g))
+
+    subjects: dict = {}
+    column_metadata: dict = {}
+
+    for s, p, o in g:
+        subj = str(s)
+        pred = str(p)
+
+        # capture datatype or language tag for companion file
+        if hasattr(o, "datatype") and o.datatype:
+            column_metadata[pred] = {"datatype": str(o.datatype)}
+        elif hasattr(o, "language") and o.language:
+            column_metadata[pred] = {"language": str(o.language)}
+
+        if subj not in subjects:
+            subjects[subj] = {}
+        if pred not in subjects[subj]:
+            subjects[subj][pred] = []
+        subjects[subj][pred].append(str(o))
+
+    with open(output_file, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerow(["resource"] + predicates)
+        for subj, pred_map in subjects.items():
+            row = [subj]
+            for pred in predicates:
+                values = pred_map.get(pred, [])
+                row.append("|".join(values))
+            writer.writerow(row)
+
+    companion_file = output_file + ".meta.json"
+    with open(companion_file, "w", encoding="utf-8") as f:
+        json.dump({"columns": column_metadata}, f, indent=2)
+
+    print(f"Converted RDF -> CSV: {os.path.basename(output_file)}")
+    print(f"Companion metadata: {os.path.basename(companion_file)}")
+
+
+# ---------------------------------------------------------------------------
+# Main dispatcher — called from download pipeline
+# ---------------------------------------------------------------------------
+
+def convert_file(
+    input_file: str,
+    output_file: str,
+    convert_format: str,
+) -> None:
+    """Main conversion dispatcher called from the download pipeline.
+
+    Detects the input format from the file extension, determines whether
+    this is a Layer 2 (within-class) or Layer 3 (cross-class) conversion,
+    and delegates to the appropriate conversion function.
+
+    For Layer 2: lossless, same equivalence class.
+    For Layer 3: quasi-equal for RDF <-> Tabular, lossless for Triples <-> Quads.
+
+    Args:
+        input_file: Path to the input file (must be decompressed).
+        output_file: Path to write the converted output file.
+        convert_format: Target format name (CLI format string).
+
+    Raises:
+        ValueError: If the input format cannot be detected or if the
+                    requested conversion is not supported.
+    """
+    input_format = detect_format_from_filename(input_file)
+
+    if input_format is None:
+        raise ValueError(
+            f"Could not detect input format from filename: '{os.path.basename(input_file)}'. "
+            f"Supported extensions: {list(EXTENSION_TO_FORMAT.keys())}"
+        )
+
+    if input_format == convert_format:
+        print(
+            f"WARNING: Input and target format are both '{input_format}'. "
+            "Skipping conversion."
+        )
+        return
+
+    input_class = get_format_class(input_format)
+    output_class = get_format_class(convert_format)
+
+    # --- Layer 2: within-class ---
+    if input_class == output_class:
+        if input_class == "triples":
+            convert_rdf_triple_format(
+                input_file, output_file, input_format, convert_format
+            )
+        elif input_class == "quads":
+            convert_rdf_quad_format(
+                input_file, output_file, input_format, convert_format
+            )
+        elif input_class == "tabular":
+            convert_tabular_format(
+                input_file, output_file, input_format, convert_format
+            )
+        return
+
+    # --- Layer 3: cross-class ---
+    if input_class == "triples" and output_class == "tabular":
+        convert_rdf_to_csv(input_file, output_file, input_format)
+        return
+
+    raise ValueError(
+        f"Conversion from '{input_format}' ({input_class}) to "
+        f"'{convert_format}' ({output_class}) is not yet implemented. "
+        f"Supported Layer 3 conversions: RDF Triples -> CSV/TSV."
+    )
\ No newline at end of file
diff --git a/databusclient/api/download.py b/databusclient/api/download.py
index 312af45..74bd2fd 100644
--- a/databusclient/api/download.py
+++ b/databusclient/api/download.py
@@ -16,6 +16,7 @@
     get_databus_id_parts_from_file_url,
     compute_sha256_and_length,
 )
+from databusclient.api.convert import convert_file, get_converted_filename
 
 # Compression format mappings
 COMPRESSION_EXTENSIONS = {
@@ -313,6 +314,7 @@ def _download_file(
     client_id=None,
     convert_to=None,
     convert_from=None,
+    convert_format=None,
     validate_checksum: bool = False,
     expected_checksum: str | None = None,
 ) -> None:
@@ -327,6 +329,7 @@ def _download_file(
         client_id: Client ID for token exchange.
         convert_to: Target compression format for on-the-fly conversion.
         convert_from: Optional source compression format filter.
+        convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
         expected_checksum: The expected checksum of the file.
     """
@@ -507,12 +510,63 @@ def _download_file(
 
     # --- 7. Convert compression format if requested (AFTER validation) ---
     should_convert, source_format = _should_convert_file(file, convert_to, convert_from)
+    final_downloaded_file = filename
     if should_convert and source_format:
         target_filename = _get_converted_filename(file, source_format, convert_to)
         target_filepath = os.path.join(localDir, target_filename)
         _convert_compression_format(
             filename, target_filepath, source_format, convert_to
         )
+        final_downloaded_file = target_filepath
+
+    # --- 8. Convert file format if requested (AFTER compression conversion) ---
+    if convert_format:
+        final_basename = os.path.basename(final_downloaded_file)
+        compression_fmt = _detect_compression_format(final_basename)
+
+        if compression_fmt:
+            # File is still compressed — decompress to a temp file first,
+            # then convert format, then clean up the temp file.
+            # This follows the pipeline: Download -> Decompress -> Convert -> Save
+            import tempfile
+
+            source_module = COMPRESSION_MODULES[compression_fmt]
+            # temp decompressed file sits next to the original
+            compression_ext = COMPRESSION_EXTENSIONS[compression_fmt]
+            if final_downloaded_file.lower().endswith(compression_ext):
+                temp_decompressed = final_downloaded_file[:-len(compression_ext)]
+            else:
+                temp_decompressed = final_downloaded_file + ".decompressed"
+
+            try:
+                print(
+                    f"Decompressing {final_basename} before format conversion..."
+                )
+                with source_module.open(final_downloaded_file, "rb") as sf:
+                    with open(temp_decompressed, "wb") as tf:
+                        while True:
+                            chunk = sf.read(8192)
+                            if not chunk:
+                                break
+                            tf.write(chunk)
+
+                # now convert the decompressed temp file
+                converted_filename = get_converted_filename(
+                    final_basename, convert_format
+                )
+                converted_filepath = os.path.join(localDir, converted_filename)
+                convert_file(temp_decompressed, converted_filepath, convert_format)
+
+            finally:
+                # always clean up temp file even if conversion fails
+                if os.path.exists(temp_decompressed):
+                    os.remove(temp_decompressed)
+
+        else:
+            # file is already uncompressed — convert directly
+            converted_filename = get_converted_filename(final_basename, convert_format)
+            converted_filepath = os.path.join(localDir, converted_filename)
+            convert_file(final_downloaded_file, converted_filepath, convert_format)
 
 
 def _download_files(
@@ -524,6 +578,7 @@ def _download_files(
     client_id: str = None,
     convert_to: str = None,
     convert_from: str = None,
+    convert_format: str = None,
     validate_checksum: bool = False,
     checksums: dict | None = None,
 ) -> None:
@@ -538,6 +593,7 @@ def _download_files(
         client_id: Client ID for token exchange.
         convert_to: Target compression format for on-the-fly conversion.
         convert_from: Optional source compression format filter.
+        convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
         checksums: Dictionary mapping URLs to their expected checksums.
     """
@@ -554,6 +610,7 @@ def _download_files(
             client_id=client_id,
             convert_to=convert_to,
             convert_from=convert_from,
+            convert_format=convert_format,
             validate_checksum=validate_checksum,
             expected_checksum=expected,
         )
@@ -702,6 +759,7 @@ def _download_collection(
     client_id: str = None,
     convert_to: str = None,
     convert_from: str = None,
+    convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
     """Download all files in a databus collection.
@@ -716,6 +774,7 @@ def _download_collection(
         client_id: Client ID for token exchange.
         convert_to: Target compression format for on-the-fly conversion.
         convert_from: Optional source compression format filter.
+        convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
     query = _get_sparql_query_of_collection(uri, databus_key=databus_key)
@@ -737,6 +796,7 @@ def _download_collection(
         client_id=client_id,
         convert_to=convert_to,
         convert_from=convert_from,
+        convert_format=convert_format,
         validate_checksum=validate_checksum,
         checksums=checksums if checksums else None,
     )
@@ -751,6 +811,7 @@ def _download_version(
     client_id: str = None,
     convert_to: str = None,
     convert_from: str = None,
+    convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
     """Download all files in a databus artifact version.
@@ -764,6 +825,7 @@ def _download_version(
         client_id: Client ID for token exchange.
         convert_to: Target compression format for on-the-fly conversion.
         convert_from: Optional source compression format filter.
+        convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
     json_str = fetch_databus_jsonld(uri, databus_key=databus_key)
@@ -784,6 +846,7 @@ def _download_version(
         client_id=client_id,
         convert_to=convert_to,
         convert_from=convert_from,
+        convert_format=convert_format,
         validate_checksum=validate_checksum,
         checksums=checksums,
     )
@@ -799,6 +862,7 @@ def _download_artifact(
     client_id: str = None,
     convert_to: str = None,
     convert_from: str = None,
+    convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
     """Download files in a databus artifact.
@@ -813,6 +877,7 @@ def _download_artifact(
         client_id: Client ID for token exchange.
         convert_to: Target compression format for on-the-fly conversion.
         convert_from: Optional source compression format filter.
+        convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
     json_str = fetch_databus_jsonld(uri, databus_key=databus_key)
@@ -839,6 +904,7 @@ def _download_artifact(
             client_id=client_id,
             convert_to=convert_to,
             convert_from=convert_from,
+            convert_format=convert_format,
             validate_checksum=validate_checksum,
             checksums=checksums,
         )
@@ -915,6 +981,7 @@ def _download_group(
     client_id: str = None,
     convert_to: str = None,
     convert_from: str = None,
+    convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
     """Download files in a databus group.
@@ -929,6 +996,7 @@ def _download_group(
         client_id: Client ID for token exchange.
         convert_to: Target compression format for on-the-fly conversion.
         convert_from: Optional source compression format filter.
+        convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
     json_str = fetch_databus_jsonld(uri, databus_key=databus_key)
@@ -945,6 +1013,7 @@ def _download_group(
             client_id=client_id,
             convert_to=convert_to,
             convert_from=convert_from,
+            convert_format=convert_format,
             validate_checksum=validate_checksum,
         )
 
@@ -994,6 +1063,7 @@ def download(
     client_id="vault-token-exchange",
     convert_to=None,
     convert_from=None,
+    convert_format=None,
     validate_checksum: bool = False,
 ) -> None:
     """Download datasets from databus.
@@ -1010,6 +1080,7 @@ def download(
         client_id: Client ID for token exchange. Default is "vault-token-exchange".
         convert_to: Target compression format for on-the-fly conversion (supported: bz2, gz, xz).
         convert_from: Optional source compression format filter.
+        convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
     for databusURI in databusURIs:
@@ -1039,6 +1110,7 @@ def download(
                     client_id,
                     convert_to,
                     convert_from,
+                    convert_format,
                     validate_checksum=validate_checksum,
                 )
             elif file is not None:
@@ -1060,6 +1132,7 @@ def download(
                     client_id=client_id,
                     convert_to=convert_to,
                     convert_from=convert_from,
+                    convert_format=convert_format,
                     validate_checksum=validate_checksum,
                     expected_checksum=expected,
                 )
@@ -1074,6 +1147,7 @@ def download(
                     client_id=client_id,
                     convert_to=convert_to,
                     convert_from=convert_from,
+                    convert_format=convert_format,
                     validate_checksum=validate_checksum,
                 )
             elif artifact is not None:
@@ -1090,6 +1164,7 @@ def download(
                     client_id=client_id,
                     convert_to=convert_to,
                     convert_from=convert_from,
+                    convert_format=convert_format,
                     validate_checksum=validate_checksum,
                 )
             elif group is not None and group != "collections":
@@ -1106,6 +1181,7 @@ def download(
                     client_id=client_id,
                     convert_to=convert_to,
                     convert_from=convert_from,
+                    convert_format=convert_format,
                     validate_checksum=validate_checksum,
                 )
             elif account is not None:
@@ -1144,6 +1220,7 @@ def download(
                 client_id=client_id,
                 convert_to=convert_to,
                 convert_from=convert_from,
+                convert_format=convert_format,
                 validate_checksum=validate_checksum,
                 checksums=checksums if checksums else None,
             )
diff --git a/databusclient/cli.py b/databusclient/cli.py
index c3bd8f2..c687616 100644
--- a/databusclient/cli.py
+++ b/databusclient/cli.py
@@ -189,6 +189,15 @@ def deploy(
     type=click.Choice(["bz2", "gz", "xz"], case_sensitive=False),
     help="Source compression format to convert from (optional filter). Only files with this compression will be converted.",
 )
+@click.option(
+    "--convert-format",
+    "convert_format",
+    type=click.Choice(
+        ["ntriples","turtle","rdf-xml","nquads","trig","trix","json-ld","csv","tsv"],
+        case_sensitive=False,
+    ),
+    help="Target format for on-the-fly format conversion during download (Layer 2 and Layer 3).",
+)
 @click.option(
     "--validate-checksum", is_flag=True, help="Validate checksums of downloaded files"
 )
@@ -203,6 +212,7 @@ def download(
     clientid,
     convert_to,
     convert_from,
+    convert_format,
     validate_checksum,
 ):
     """
@@ -221,6 +231,7 @@ def download(
             client_id=clientid,
             convert_to=convert_to,
             convert_from=convert_from,
+            convert_format=convert_format,
             validate_checksum=validate_checksum,
         )
     except DownloadAuthError as e:
diff --git a/run_all_conversion_tests.py b/run_all_conversion_tests.py
new file mode 100644
index 0000000..384e052
--- /dev/null
+++ b/run_all_conversion_tests.py
@@ -0,0 +1,338 @@
+"""
+Layer 2 Conversion Testing Script
+Tests every conversion combination systematically.
+Outputs go to test_outputs/ folder.
+Test file for testing with real datasets from databus.
+"""
+
+import os
+from databusclient.api.convert import (
+    convert_rdf_triple_format,
+    convert_rdf_quad_format,
+    convert_tabular_format,
+)
+
+# ---------------------------------------------------------------------------
+# Setup output folders
+# ---------------------------------------------------------------------------
+
+folders = [
+    "test_outputs/triples/T1_turtle_to_ntriples",
+    "test_outputs/triples/T2_turtle_to_rdfxml",
+    "test_outputs/triples/T3_ntriples_to_turtle",
+    "test_outputs/triples/T4_ntriples_to_rdfxml",
+    "test_outputs/triples/T5_rdfxml_to_turtle",
+    "test_outputs/triples/T6_rdfxml_to_ntriples",
+    "test_outputs/quads/Q1_nquads_to_trig",
+    "test_outputs/quads/Q2_nquads_to_trix",
+    "test_outputs/quads/Q3_nquads_to_jsonld",
+    "test_outputs/quads/Q4_trig_to_nquads",
+    "test_outputs/quads/Q5_trig_to_trix",
+    "test_outputs/quads/Q6_trig_to_jsonld",
+    "test_outputs/quads/Q7_trix_to_nquads",
+    "test_outputs/quads/Q8_trix_to_trig",
+    "test_outputs/quads/Q9_trix_to_jsonld",
+    "test_outputs/quads/Q10_jsonld_to_nquads",
+    "test_outputs/quads/Q11_jsonld_to_trig",
+    "test_outputs/quads/Q12_jsonld_to_trix",
+    "test_outputs/tabular/TAB1_csv_to_tsv",
+    "test_outputs/tabular/TAB2_tsv_to_csv",
+]
+
+for folder in folders:
+    os.makedirs(folder, exist_ok=True)
+
+results = []
+
+
+def run_test(test_id, description, func, input_file, output_file, *args):
+    """Run one conversion test and record the result."""
+    try:
+        func(input_file, output_file, *args)
+        size = os.path.getsize(output_file)
+        results.append(f"PASS  {test_id}: {description} -> {os.path.basename(output_file)} ({size} bytes)")
+        return output_file
+    except Exception as e:
+        results.append(f"FAIL  {test_id}: {description} -> ERROR: {e}")
+        return None
+
+
+# ---------------------------------------------------------------------------
+# GROUP 1: RDF Triple Format Conversions
+# 6 combinations: each format -> every other format
+# Base file: test_outputs/base/base.ttl (real DBpedia Turtle data)
+# Chain: turtle -> ntriples -> rdfxml -> back to turtle
+# ---------------------------------------------------------------------------
+
+print("\n=== GROUP 1: RDF TRIPLE FORMAT CONVERSIONS ===\n")
+
+BASE_TTL = "test_outputs/base/base.ttl"
+
+# T1: turtle -> ntriples (from base turtle file)
+t1_out = "test_outputs/triples/T1_turtle_to_ntriples/output.nt"
+run_test(
+    "T1", "turtle -> ntriples",
+    convert_rdf_triple_format,
+    BASE_TTL, t1_out, "turtle", "ntriples"
+)
+
+# T2: turtle -> rdf-xml (from base turtle file)
+t2_out = "test_outputs/triples/T2_turtle_to_rdfxml/output.rdf"
+run_test(
+    "T2", "turtle -> rdf-xml",
+    convert_rdf_triple_format,
+    BASE_TTL, t2_out, "turtle", "rdf-xml"
+)
+
+# T3: ntriples -> turtle (uses T1 output)
+t3_out = "test_outputs/triples/T3_ntriples_to_turtle/output.ttl"
+if t1_out and os.path.exists(t1_out):
+    run_test(
+        "T3", "ntriples -> turtle",
+        convert_rdf_triple_format,
+        t1_out, t3_out, "ntriples", "turtle"
+    )
+else:
+    results.append("SKIP  T3: ntriples -> turtle (T1 output not available)")
+
+# T4: ntriples -> rdf-xml (uses T1 output)
+t4_out = "test_outputs/triples/T4_ntriples_to_rdfxml/output.rdf"
+if t1_out and os.path.exists(t1_out):
+    run_test(
+        "T4", "ntriples -> rdf-xml",
+        convert_rdf_triple_format,
+        t1_out, t4_out, "ntriples", "rdf-xml"
+    )
+else:
+    results.append("SKIP  T4: ntriples -> rdf-xml (T1 output not available)")
+
+# T5: rdf-xml -> turtle (uses T2 output)
+t5_out = "test_outputs/triples/T5_rdfxml_to_turtle/output.ttl"
+if t2_out and os.path.exists(t2_out):
+    run_test(
+        "T5", "rdf-xml -> turtle",
+        convert_rdf_triple_format,
+        t2_out, t5_out, "rdf-xml", "turtle"
+    )
+else:
+    results.append("SKIP  T5: rdf-xml -> turtle (T2 output not available)")
+
+# T6: rdf-xml -> ntriples (uses T2 output)
+t6_out = "test_outputs/triples/T6_rdfxml_to_ntriples/output.nt"
+if t2_out and os.path.exists(t2_out):
+    run_test(
+        "T6", "rdf-xml -> ntriples",
+        convert_rdf_triple_format,
+        t2_out, t6_out, "rdf-xml", "ntriples"
+    )
+else:
+    results.append("SKIP  T6: rdf-xml -> ntriples (T2 output not available)")
+
+
+# ---------------------------------------------------------------------------
+# GROUP 2: RDF Quad Format Conversions
+# 12 combinations: each of 4 formats -> every other format (4*3=12)
+# Base file: test_outputs/base/base.nq
+# Chain: nquads -> trig -> trix -> jsonld -> back to nquads
+# ---------------------------------------------------------------------------
+
+print("\n=== GROUP 2: RDF QUAD FORMAT CONVERSIONS ===\n")
+
+BASE_NQ = "test_outputs/base/base.nq"
+
+# Q1: nquads -> trig
+q1_out = "test_outputs/quads/Q1_nquads_to_trig/output.trig"
+run_test(
+    "Q1", "nquads -> trig",
+    convert_rdf_quad_format,
+    BASE_NQ, q1_out, "nquads", "trig"
+)
+
+# Q2: nquads -> trix
+q2_out = "test_outputs/quads/Q2_nquads_to_trix/output.trix"
+run_test(
+    "Q2", "nquads -> trix",
+    convert_rdf_quad_format,
+    BASE_NQ, q2_out, "nquads", "trix"
+)
+
+# Q3: nquads -> json-ld
+q3_out = "test_outputs/quads/Q3_nquads_to_jsonld/output.jsonld"
+run_test(
+    "Q3", "nquads -> json-ld",
+    convert_rdf_quad_format,
+    BASE_NQ, q3_out, "nquads", "json-ld"
+)
+
+# Q4: trig -> nquads (uses Q1 output)
+q4_out = "test_outputs/quads/Q4_trig_to_nquads/output.nq"
+if q1_out and os.path.exists(q1_out):
+    run_test(
+        "Q4", "trig -> nquads",
+        convert_rdf_quad_format,
+        q1_out, q4_out, "trig", "nquads"
+    )
+else:
+    results.append("SKIP  Q4: trig -> nquads (Q1 output not available)")
+
+# Q5: trig -> trix (uses Q1 output)
+q5_out = "test_outputs/quads/Q5_trig_to_trix/output.trix"
+if q1_out and os.path.exists(q1_out):
+    run_test(
+        "Q5", "trig -> trix",
+        convert_rdf_quad_format,
+        q1_out, q5_out, "trig", "trix"
+    )
+else:
+    results.append("SKIP  Q5: trig -> trix (Q1 output not available)")
+
+# Q6: trig -> json-ld (uses Q1 output)
+q6_out = "test_outputs/quads/Q6_trig_to_jsonld/output.jsonld"
+if q1_out and os.path.exists(q1_out):
+    run_test(
+        "Q6", "trig -> json-ld",
+        convert_rdf_quad_format,
+        q1_out, q6_out, "trig", "json-ld"
+    )
+else:
+    results.append("SKIP  Q6: trig -> json-ld (Q1 output not available)")
+
+# Q7: trix -> nquads (uses Q2 output)
+q7_out = "test_outputs/quads/Q7_trix_to_nquads/output.nq"
+if q2_out and os.path.exists(q2_out):
+    run_test(
+        "Q7", "trix -> nquads",
+        convert_rdf_quad_format,
+        q2_out, q7_out, "trix", "nquads"
+    )
+else:
+    results.append("SKIP  Q7: trix -> nquads (Q2 output not available)")
+
+# Q8: trix -> trig (uses Q2 output)
+q8_out = "test_outputs/quads/Q8_trix_to_trig/output.trig"
+if q2_out and os.path.exists(q2_out):
+    run_test(
+        "Q8", "trix -> trig",
+        convert_rdf_quad_format,
+        q2_out, q8_out, "trix", "trig"
+    )
+else:
+    results.append("SKIP  Q8: trix -> trig (Q2 output not available)")
+
+# Q9: trix -> json-ld (uses Q2 output)
+q9_out = "test_outputs/quads/Q9_trix_to_jsonld/output.jsonld"
+if q2_out and os.path.exists(q2_out):
+    run_test(
+        "Q9", "trix -> json-ld",
+        convert_rdf_quad_format,
+        q2_out, q9_out, "trix", "json-ld"
+    )
+else:
+    results.append("SKIP  Q9: trix -> json-ld (Q2 output not available)")
+
+# Q10: json-ld -> nquads (uses Q3 output)
+q10_out = "test_outputs/quads/Q10_jsonld_to_nquads/output.nq"
+if q3_out and os.path.exists(q3_out):
+    run_test(
+        "Q10", "json-ld -> nquads",
+        convert_rdf_quad_format,
+        q3_out, q10_out, "json-ld", "nquads"
+    )
+else:
+    results.append("SKIP  Q10: json-ld -> nquads (Q3 output not available)")
+
+# Q11: json-ld -> trig (uses Q3 output)
+q11_out = "test_outputs/quads/Q11_jsonld_to_trig/output.trig"
+if q3_out and os.path.exists(q3_out):
+    run_test(
+        "Q11", "json-ld -> trig",
+        convert_rdf_quad_format,
+        q3_out, q11_out, "json-ld", "trig"
+    )
+else:
+    results.append("SKIP  Q11: json-ld -> trig (Q3 output not available)")
+
+# Q12: json-ld -> trix (uses Q3 output)
+q12_out = "test_outputs/quads/Q12_jsonld_to_trix/output.trix"
+if q3_out and os.path.exists(q3_out):
+    run_test(
+        "Q12", "json-ld -> trix",
+        convert_rdf_quad_format,
+        q3_out, q12_out, "json-ld", "trix"
+    )
+else:
+    results.append("SKIP  Q12: json-ld -> trix (Q3 output not available)")
+
+
+# ---------------------------------------------------------------------------
+# GROUP 3: Tabular Format Conversions
+# 2 combinations: csv->tsv and tsv->csv
+# ---------------------------------------------------------------------------
+
+print("\n=== GROUP 3: TABULAR FORMAT CONVERSIONS ===\n")
+
+BASE_CSV = "test_outputs/base/base.csv"
+BASE_TSV = "test_outputs/base/base.tsv"
+
+# TAB1: csv -> tsv
+tab1_out = "test_outputs/tabular/TAB1_csv_to_tsv/output.tsv"
+run_test(
+    "TAB1", "csv -> tsv",
+    convert_tabular_format,
+    BASE_CSV, tab1_out, "csv", "tsv"
+)
+
+# TAB2: tsv -> csv (uses TAB1 output)
+tab2_out = "test_outputs/tabular/TAB2_tsv_to_csv/output.csv"
+if tab1_out and os.path.exists(tab1_out):
+    run_test(
+        "TAB2", "tsv -> csv",
+        convert_tabular_format,
+        tab1_out, tab2_out, "tsv", "csv"
+    )
+else:
+    results.append("SKIP  TAB2: tsv -> csv (TAB1 output not available)")
+
+
+# ---------------------------------------------------------------------------
+# GROUP 4: CLI End-to-End Tests (compressed real Databus file)
+# These test the full pipeline including download.py wiring
+# ---------------------------------------------------------------------------
+
+print("\n=== GROUP 4: CLI END-TO-END (run these manually) ===\n")
+cli_tests = [
+    "CLI1: turtle->ntriples from compressed Databus file",
+    "  poetry run databusclient download \"https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=cy.ttl.bz2\" --convert-format ntriples --localdir ./test_outputs/cli/CLI1",
+    "",
+    "CLI2: turtle->rdf-xml from compressed Databus file",
+    "  poetry run databusclient download \"https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=cy.ttl.bz2\" --convert-format rdf-xml --localdir ./test_outputs/cli/CLI2",
+    "",
+    "CLI3: turtle->ntriples + compression bz2->gz",
+    "  poetry run databusclient download \"https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=cy.ttl.bz2\" --convert-format ntriples --convert-to gz --localdir ./test_outputs/cli/CLI3",
+    "",
+    "CLI4: turtle->ntriples + compression bz2->xz",
+    "  poetry run databusclient download \"https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=cy.ttl.bz2\" --convert-format ntriples --convert-to xz --localdir ./test_outputs/cli/CLI4",
+    "",
+    "CLI5: unsupported cross-class error (expect ValueError)",
+    "  poetry run databusclient download \"https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=cy.ttl.bz2\" --convert-format nquads --localdir ./test_outputs/cli/CLI5",
+]
+for line in cli_tests:
+    print(line)
+
+
+# ---------------------------------------------------------------------------
+# Print summary
+# ---------------------------------------------------------------------------
+
+print("\n" + "="*60)
+print("LAYER 2 CONVERSION TEST SUMMARY")
+print("="*60)
+for result in results:
+    print(result)
+
+passed = sum(1 for r in results if r.startswith("PASS"))
+failed = sum(1 for r in results if r.startswith("FAIL"))
+skipped = sum(1 for r in results if r.startswith("SKIP"))
+
+print(f"\nTotal: {passed} passed, {failed} failed, {skipped} skipped")
+print("="*60)
\ No newline at end of file
diff --git a/tests/test_conversion.py b/tests/test_conversion.py
new file mode 100644
index 0000000..7ce710c
--- /dev/null
+++ b/tests/test_conversion.py
@@ -0,0 +1,309 @@
+"""Round trip tests for Layer 2 format conversion.
+
+Following the strategy from Frey et al., each test validates that
+reading a format and writing it back produces semantically identical output.
+Pattern: parse(format X) -> serialize(format X) -> parse again -> compare.
+
+9 tests total:
+- Triple formats: ntriples, turtle, rdf-xml          (3 tests)
+- Quad formats:   nquads, trig, trix, json-ld        (4 tests)
+- Tabular formats: csv, tsv                           (2 tests)
+"""
+
+import csv
+import os
+import tempfile
+
+from rdflib import Dataset, Graph
+
+from databusclient.api.convert import (
+    convert_rdf_quad_format,
+    convert_rdf_triple_format,
+    convert_tabular_format,
+)
+
+# ---------------------------------------------------------------------------
+# Sample RDF data used across all RDF tests
+# ---------------------------------------------------------------------------
+
+SAMPLE_TURTLE = """
+@prefix ex: <http://example.org/> .
+@prefix schema: <http://schema.org/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+ex:Paris schema:isCapitalOf ex:France ;
+         schema:population "2161000"^^xsd:integer .
+
+ex:Berlin schema:isCapitalOf ex:Germany ;
+          schema:population "3645000"^^xsd:integer .
+"""
+
+SAMPLE_NQUADS = """
+<http://example.org/Paris> <http://schema.org/isCapitalOf> <http://example.org/France> <http://example.org/graph1> .
+<http://example.org/Berlin> <http://schema.org/isCapitalOf> <http://example.org/Germany> <http://example.org/graph1> .
+<http://example.org/Rome> <http://schema.org/isCapitalOf> <http://example.org/Italy> <http://example.org/graph2> .
+"""
+
+SAMPLE_CSV = """resource,name,population
+http://example.org/Paris,Paris,2161000
+http://example.org/Berlin,Berlin,3645000
+"""
+
+SAMPLE_TSV = "resource\tname\tpopulation\nhttp://example.org/Paris\tParis\t2161000\nhttp://example.org/Berlin\tBerlin\t3645000\n"
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _write_temp(content: str, suffix: str) -> str:
+    """Write content to a named temp file and return its path."""
+    fd, path = tempfile.mkstemp(suffix=suffix)
+    with os.fdopen(fd, "w", encoding="utf-8") as f:
+        f.write(content)
+    return path
+
+
+def _graphs_are_isomorphic(g1: Graph, g2: Graph) -> bool:
+    """Check semantic equivalence of two rdflib Graphs."""
+    return g1.isomorphic(g2)
+
+
+def _datasets_equal(g1: Dataset, g2: Dataset) -> bool:
+    """Check semantic equivalence of two Datasets by triple count and graph names."""
+    if len(g1) != len(g2):
+        return False
+    graphs1 = {str(c.identifier) for c in g1.graphs()}
+    graphs2 = {str(c.identifier) for c in g2.graphs()}
+    return graphs1 == graphs2
+
+
+# ---------------------------------------------------------------------------
+# Triple format round trip tests (Layer 2)
+# ---------------------------------------------------------------------------
+
+def test_round_trip_turtle():
+    """Turtle -> Turtle: parse, serialize, reparse, compare."""
+    input_path = _write_temp(SAMPLE_TURTLE, ".ttl")
+    output_path = input_path + ".rt.ttl"
+
+    try:
+        convert_rdf_triple_format(input_path, output_path, "turtle", "turtle")
+
+        g_original = Graph()
+        g_original.parse(input_path, format="turtle")
+
+        g_roundtrip = Graph()
+        g_roundtrip.parse(output_path, format="turtle")
+
+        assert _graphs_are_isomorphic(g_original, g_roundtrip), (
+            "Turtle round trip failed: graphs are not isomorphic"
+        )
+    finally:
+        for p in (input_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+def test_round_trip_ntriples():
+    """N-Triples -> N-Triples: parse, serialize, reparse, compare."""
+    # first produce an ntriples file from turtle
+    turtle_path = _write_temp(SAMPLE_TURTLE, ".ttl")
+    nt_path = turtle_path + ".nt"
+    output_path = nt_path + ".rt.nt"
+
+    try:
+        convert_rdf_triple_format(turtle_path, nt_path, "turtle", "ntriples")
+        convert_rdf_triple_format(nt_path, output_path, "ntriples", "ntriples")
+
+        g_original = Graph()
+        g_original.parse(nt_path, format="ntriples")
+
+        g_roundtrip = Graph()
+        g_roundtrip.parse(output_path, format="ntriples")
+
+        assert _graphs_are_isomorphic(g_original, g_roundtrip), (
+            "N-Triples round trip failed: graphs are not isomorphic"
+        )
+    finally:
+        for p in (turtle_path, nt_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+def test_round_trip_rdf_xml():
+    """RDF/XML -> RDF/XML: parse, serialize, reparse, compare."""
+    turtle_path = _write_temp(SAMPLE_TURTLE, ".ttl")
+    rdf_path = turtle_path + ".rdf"
+    output_path = rdf_path + ".rt.rdf"
+
+    try:
+        convert_rdf_triple_format(turtle_path, rdf_path, "turtle", "rdf-xml")
+        convert_rdf_triple_format(rdf_path, output_path, "rdf-xml", "rdf-xml")
+
+        g_original = Graph()
+        g_original.parse(rdf_path, format="xml")
+
+        g_roundtrip = Graph()
+        g_roundtrip.parse(output_path, format="xml")
+
+        assert _graphs_are_isomorphic(g_original, g_roundtrip), (
+            "RDF/XML round trip failed: graphs are not isomorphic"
+        )
+    finally:
+        for p in (turtle_path, rdf_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+# ---------------------------------------------------------------------------
+# Quad format round trip tests (Layer 2)
+# ---------------------------------------------------------------------------
+
+def test_round_trip_nquads():
+    """N-Quads -> N-Quads: parse, serialize, reparse, compare."""
+    input_path = _write_temp(SAMPLE_NQUADS, ".nq")
+    output_path = input_path + ".rt.nq"
+
+    try:
+        convert_rdf_quad_format(input_path, output_path, "nquads", "nquads")
+
+        g_original = Dataset()
+        g_original.parse(input_path, format="nquads")
+
+        g_roundtrip = Dataset()
+        g_roundtrip.parse(output_path, format="nquads")
+
+        assert _datasets_equal(g_original, g_roundtrip), (
+            "N-Quads round trip failed: graphs are not equal"
+        )
+    finally:
+        for p in (input_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+def test_round_trip_trig():
+    """TriG -> TriG: parse, serialize, reparse, compare."""
+    # produce trig from nquads
+    nq_path = _write_temp(SAMPLE_NQUADS, ".nq")
+    trig_path = nq_path + ".trig"
+    output_path = trig_path + ".rt.trig"
+
+    try:
+        convert_rdf_quad_format(nq_path, trig_path, "nquads", "trig")
+        convert_rdf_quad_format(trig_path, output_path, "trig", "trig")
+
+        g_original = Dataset()
+        g_original.parse(trig_path, format="trig")
+
+        g_roundtrip = Dataset()
+        g_roundtrip.parse(output_path, format="trig")
+
+        assert _datasets_equal(g_original, g_roundtrip), (
+            "TriG round trip failed: graphs are not equal"
+        )
+    finally:
+        for p in (nq_path, trig_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+def test_round_trip_trix():
+    """TriX -> TriX: parse, serialize, reparse, compare."""
+    nq_path = _write_temp(SAMPLE_NQUADS, ".nq")
+    trix_path = nq_path + ".trix"
+    output_path = trix_path + ".rt.trix"
+
+    try:
+        convert_rdf_quad_format(nq_path, trix_path, "nquads", "trix")
+        convert_rdf_quad_format(trix_path, output_path, "trix", "trix")
+
+        g_original = Dataset()
+        g_original.parse(trix_path, format="trix")
+
+        g_roundtrip = Dataset()
+        g_roundtrip.parse(output_path, format="trix")
+
+        assert _datasets_equal(g_original, g_roundtrip), (
+            "TriX round trip failed: graphs are not equal"
+        )
+    finally:
+        for p in (nq_path, trix_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+def test_round_trip_json_ld():
+    """JSON-LD -> JSON-LD: parse, serialize, reparse, compare."""
+    nq_path = _write_temp(SAMPLE_NQUADS, ".nq")
+    jsonld_path = nq_path + ".jsonld"
+    output_path = jsonld_path + ".rt.jsonld"
+
+    try:
+        convert_rdf_quad_format(nq_path, jsonld_path, "nquads", "json-ld")
+        convert_rdf_quad_format(jsonld_path, output_path, "json-ld", "json-ld")
+
+        g_original = Dataset()
+        g_original.parse(jsonld_path, format="json-ld")
+
+        g_roundtrip = Dataset()
+        g_roundtrip.parse(output_path, format="json-ld")
+
+        assert _datasets_equal(g_original, g_roundtrip), (
+            "JSON-LD round trip failed: graphs are not equal"
+        )
+    finally:
+        for p in (nq_path, jsonld_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+# ---------------------------------------------------------------------------
+# Tabular format round trip tests (Layer 2)
+# ---------------------------------------------------------------------------
+
+def test_round_trip_csv():
+    """CSV -> CSV: read, write, reread, compare rows."""
+    input_path = _write_temp(SAMPLE_CSV, ".csv")
+    output_path = input_path + ".rt.csv"
+
+    try:
+        convert_tabular_format(input_path, output_path, "csv", "csv")
+
+        with open(input_path, newline="", encoding="utf-8") as f:
+            original_rows = list(csv.reader(f))
+
+        with open(output_path, newline="", encoding="utf-8") as f:
+            roundtrip_rows = list(csv.reader(f))
+
+        assert original_rows == roundtrip_rows, (
+            "CSV round trip failed: rows do not match"
+        )
+    finally:
+        for p in (input_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
+
+
+def test_round_trip_tsv():
+    """TSV -> TSV: read, write, reread, compare rows."""
+    input_path = _write_temp(SAMPLE_TSV, ".tsv")
+    output_path = input_path + ".rt.tsv"
+
+    try:
+        convert_tabular_format(input_path, output_path, "tsv", "tsv")
+
+        with open(input_path, newline="", encoding="utf-8") as f:
+            original_rows = list(csv.reader(f, delimiter="\t"))
+
+        with open(output_path, newline="", encoding="utf-8") as f:
+            roundtrip_rows = list(csv.reader(f, delimiter="\t"))
+
+        assert original_rows == roundtrip_rows, (
+            "TSV round trip failed: rows do not match"
+        )
+    finally:
+        for p in (input_path, output_path):
+            if os.path.exists(p):
+                os.remove(p)
\ No newline at end of file

From 5d224f691897580e52bab5f82f26639a1ce69f6b Mon Sep 17 00:00:00 2001
From: DhanashreePetare <dhanashreepetare8125@gmail.com>
Date: Sat, 6 Jun 2026 19:53:50 +0530
Subject: [PATCH 2/7] gsoc26: Refactor Layer 2 with handler architecture and
 improved tests

---
 databusclient/api/convert.py     | 410 ++++++++++++++++++++++---------
 databusclient/api/download.py    |  46 +++-
 pyproject.toml                   |   6 +
 tests/resources/sample.csv       |  11 +
 tests/resources/sample.jsonld    |  62 +++++
 tests/resources/sample.nq        |  10 +
 tests/resources/sample.nt        |  10 +
 tests/resources/sample.rdf       |  30 +++
 tests/resources/sample.trig      |  22 ++
 tests/resources/sample.trix      |  72 ++++++
 tests/resources/sample.tsv       |  11 +
 tests/resources/sample.ttl       |  18 ++
 tests/test_conversion.py         | 309 -----------------------
 tests/test_format_round_trips.py | 256 +++++++++++++++++++
 14 files changed, 837 insertions(+), 436 deletions(-)
 create mode 100644 tests/resources/sample.csv
 create mode 100644 tests/resources/sample.jsonld
 create mode 100644 tests/resources/sample.nq
 create mode 100644 tests/resources/sample.nt
 create mode 100644 tests/resources/sample.rdf
 create mode 100644 tests/resources/sample.trig
 create mode 100644 tests/resources/sample.trix
 create mode 100644 tests/resources/sample.tsv
 create mode 100644 tests/resources/sample.ttl
 delete mode 100644 tests/test_conversion.py
 create mode 100644 tests/test_format_round_trips.py

diff --git a/databusclient/api/convert.py b/databusclient/api/convert.py
index 8d28fd8..8bd6dbb 100644
--- a/databusclient/api/convert.py
+++ b/databusclient/api/convert.py
@@ -1,14 +1,33 @@
 """Format and Mapping Conversion Layer.
 
+This module implements the format conversion pipeline for the Databus Python Client
+
 Layer 2: Within-class format conversion (lossless).
-Layer 3: Cross-class mapping conversion (quasi-equal for RDF <-> Tabular).
+    - TripleHandler: RDF triple formats (turtle, ntriples, rdf-xml)
+    - QuadHandler:   RDF quad formats (nquads, trig, trix, json-ld)
+    - TSDHandler:    Tabular formats (csv, tsv)
+
+Layer 3 (prototype, not yet fully implemented):
+    - RDF triples -> CSV/TSV (quasi-equal, companion metadata generated)
+
+Each handler provides read() -> IR, write(IR) -> file, convert() -> chains both.
+The IR (intermediate representation) returned by read() is designed to be passed
+to future mapping classes (TripleToQuadMapper, TripleToTSDMapper, etc.).
 """
 
 import csv
 import json
 import os
+import warnings
 from typing import Optional
 
+# Suppress rdflib internal DeprecationWarning for Dataset API.
+# rdflib is mid-migration from ConjunctiveGraph to Dataset in 7.x.
+# These warnings originate from rdflib internals, not our code.
+# Can be removed when rdflib completes their Dataset API migration.
+warnings.filterwarnings("ignore", category=DeprecationWarning, module="rdflib")
+warnings.filterwarnings("ignore", category=UserWarning, module="rdflib")
+
 from rdflib import Dataset, Graph
 
 
@@ -57,9 +76,22 @@
     ".tsv": "tsv",
 }
 
+# Maps format name -> file extension
+FORMAT_TO_EXTENSION = {
+    "ntriples": ".nt",
+    "turtle": ".ttl",
+    "rdf-xml": ".rdf",
+    "nquads": ".nq",
+    "trig": ".trig",
+    "trix": ".trix",
+    "json-ld": ".jsonld",
+    "csv": ".csv",
+    "tsv": ".tsv",
+}
+
 
 # ---------------------------------------------------------------------------
-# Format detection
+# Format detection helpers
 # ---------------------------------------------------------------------------
 
 def detect_format_from_filename(filename: str) -> Optional[str]:
@@ -110,24 +142,6 @@ def get_format_class(fmt: str) -> str:
     )
 
 
-# ---------------------------------------------------------------------------
-# Output filename helper
-# ---------------------------------------------------------------------------
-
-# Maps format name -> file extension
-FORMAT_TO_EXTENSION = {
-    "ntriples": ".nt",
-    "turtle": ".ttl",
-    "rdf-xml": ".rdf",
-    "nquads": ".nq",
-    "trig": ".trig",
-    "trix": ".trix",
-    "json-ld": ".jsonld",
-    "csv": ".csv",
-    "tsv": ".tsv",
-}
-
-
 def get_converted_filename(original_filename: str, convert_format: str) -> str:
     """Generate output filename after format conversion.
 
@@ -149,7 +163,7 @@ def get_converted_filename(original_filename: str, convert_format: str) -> str:
             name = name[: -len(ext)]
             break
 
-    # strip existing format extension
+    # strip existing format extension (longest first)
     for old_ext in sorted(FORMAT_TO_EXTENSION.values(), key=len, reverse=True):
         if name.lower().endswith(old_ext):
             name = name[: -len(old_ext)]
@@ -160,95 +174,254 @@ def get_converted_filename(original_filename: str, convert_format: str) -> str:
 
 
 # ---------------------------------------------------------------------------
-# Layer 2 — within-class format conversion
+# Layer 2 Handlers
 # ---------------------------------------------------------------------------
 
-def convert_rdf_triple_format(
-    input_file: str,
-    output_file: str,
-    input_format: str,
-    output_format: str,
-) -> None:
-    """Convert between RDF triple serialization formats (Layer 2).
+class TripleHandler:
+    """Handler for RDF triple formats (Layer 2).
 
-    Handles: ntriples, turtle, rdf-xml.
-    Uses rdflib Graph as internal representation.
+    Uses rdflib.Graph as the intermediate representation (IR).
+    Supports: ntriples, turtle, rdf-xml.
 
-    Args:
-        input_file: Path to input file.
-        output_file: Path to write converted output.
-        input_format: Source format name (must be in RDF_TRIPLE_FORMATS).
-        output_format: Target format name (must be in RDF_TRIPLE_FORMATS).
+    The IR returned by read() can be passed to future mapping classes
+    such as TripleToQuadMapper or TripleToTSDMapper for Layer 3 conversions.
     """
-    g = Graph()
-    g.parse(input_file, format=RDF_TRIPLE_FORMATS[input_format])
-    g.serialize(destination=output_file, format=RDF_TRIPLE_FORMATS[output_format])
-    print(
-        f"Converted {input_format} -> {output_format}: {os.path.basename(output_file)}"
-    )
 
+    def read(self, source: str, input_format: str) -> Graph:
+        """Parse an RDF triples file into a Graph (IR).
 
-def convert_rdf_quad_format(
-    input_file: str,
-    output_file: str,
-    input_format: str,
-    output_format: str,
-) -> None:
-    """Convert between RDF quad serialization formats (Layer 2).
+        Args:
+            source: Path to input file.
+            input_format: Source format name (e.g. 'turtle', 'ntriples', 'rdf-xml').
 
-    Handles: nquads, trig, trix, json-ld.
-    Uses rdflib Dataset as internal representation
-    to preserve named graph information.
+        Returns:
+            rdflib.Graph containing all parsed triples.
 
-    Args:
-        input_file: Path to input file.
-        output_file: Path to write converted output.
-        input_format: Source format name (must be in RDF_QUAD_FORMATS).
-        output_format: Target format name (must be in RDF_QUAD_FORMATS).
+        Raises:
+            ValueError: If input_format is not a recognised triple format.
+        """
+        if input_format not in RDF_TRIPLE_FORMATS:
+            raise ValueError(
+                f"'{input_format}' is not a triple format. "
+                f"Supported: {list(RDF_TRIPLE_FORMATS)}"
+            )
+        g = Graph()
+        g.parse(source, format=RDF_TRIPLE_FORMATS[input_format])
+        return g
+
+    def write(self, data: Graph, target: str, output_format: str) -> None:
+        """Serialize a Graph (IR) to a file.
+
+        Args:
+            data: rdflib.Graph to serialize.
+            target: Path to output file.
+            output_format: Target format name (e.g. 'ntriples', 'turtle').
+
+        Raises:
+            ValueError: If output_format is not a recognised triple format.
+        """
+        if output_format not in RDF_TRIPLE_FORMATS:
+            raise ValueError(
+                f"'{output_format}' is not a triple format. "
+                f"Supported: {list(RDF_TRIPLE_FORMATS)}"
+            )
+        # Explicitly specify utf-8 encoding to avoid NTSerializer warning
+        data.serialize(
+            destination=target,
+            format=RDF_TRIPLE_FORMATS[output_format],
+            encoding="utf-8",
+        )
+
+    def convert(
+        self,
+        source: str,
+        target: str,
+        input_format: str,
+        output_format: str,
+    ) -> None:
+        """Convert between RDF triple formats (Layer 2, lossless).
+
+        Chains read() -> write(). Both formats must be in the same
+        equivalence class (RDF triples).
+
+        Args:
+            source: Path to input file.
+            target: Path to output file.
+            input_format: Source format name.
+            output_format: Target format name.
+        """
+        graph = self.read(source, input_format)
+        self.write(graph, target, output_format)
+        print(
+            f"Converted {input_format} -> {output_format}: "
+            f"{os.path.basename(target)}"
+        )
+
+
+class QuadHandler:
+    """Handler for RDF quad formats (Layer 2).
+
+    Uses rdflib.Dataset as the intermediate representation (IR).
+    Supports: nquads, trig, trix, json-ld.
+
+    Named graph information is preserved through the Dataset IR.
+    The IR returned by read() can be passed to future mapping classes
+    such as QuadToTripleMapper or QuadToTSDMapper for Layer 3 conversions.
     """
-    g = Dataset()
-    g.parse(input_file, format=RDF_QUAD_FORMATS[input_format])
-    g.serialize(destination=output_file, format=RDF_QUAD_FORMATS[output_format])
-    print(
-        f"Converted {input_format} -> {output_format}: {os.path.basename(output_file)}"
-    )
 
+    def read(self, source: str, input_format: str) -> Dataset:
+        """Parse an RDF quads file into a Dataset (IR).
 
-def convert_tabular_format(
-    input_file: str,
-    output_file: str,
-    input_format: str,
-    output_format: str,
-) -> None:
-    """Convert between tabular formats (Layer 2).
+        Args:
+            source: Path to input file.
+            input_format: Source format name (e.g. 'nquads', 'trig', 'trix', 'json-ld').
 
-    Handles: csv <-> tsv.
-    Uses Python built-in csv module.
+        Returns:
+            rdflib.Dataset containing all parsed quads with named graphs.
 
-    Args:
-        input_file: Path to input file.
-        output_file: Path to write converted output.
-        input_format: Source format name ('csv' or 'tsv').
-        output_format: Target format name ('csv' or 'tsv').
+        Raises:
+            ValueError: If input_format is not a recognised quad format.
+        """
+        if input_format not in RDF_QUAD_FORMATS:
+            raise ValueError(
+                f"'{input_format}' is not a quad format. "
+                f"Supported: {list(RDF_QUAD_FORMATS)}"
+            )
+        d = Dataset()
+        d.parse(source, format=RDF_QUAD_FORMATS[input_format])
+        return d
+
+    def write(self, data: Dataset, target: str, output_format: str) -> None:
+        """Serialize a Dataset (IR) to a file.
+
+        Args:
+            data: rdflib.Dataset to serialize.
+            target: Path to output file.
+            output_format: Target format name.
+
+        Raises:
+            ValueError: If output_format is not a recognised quad format.
+        """
+        if output_format not in RDF_QUAD_FORMATS:
+            raise ValueError(
+                f"'{output_format}' is not a quad format. "
+                f"Supported: {list(RDF_QUAD_FORMATS)}"
+            )
+        data.serialize(
+            destination=target,
+            format=RDF_QUAD_FORMATS[output_format],
+        )
+
+    def convert(
+        self,
+        source: str,
+        target: str,
+        input_format: str,
+        output_format: str,
+    ) -> None:
+        """Convert between RDF quad formats (Layer 2, lossless).
+
+        Chains read() -> write(). Both formats must be in the same
+        equivalence class (RDF quads). Named graph information is preserved.
+
+        Args:
+            source: Path to input file.
+            target: Path to output file.
+            input_format: Source format name.
+            output_format: Target format name.
+        """
+        dataset = self.read(source, input_format)
+        self.write(dataset, target, output_format)
+        print(
+            f"Converted {input_format} -> {output_format}: "
+            f"{os.path.basename(target)}"
+        )
+
+
+class TSDHandler:
+    """Handler for tabular structured data formats (Layer 2).
+
+    Uses list[list[str]] as the intermediate representation (IR).
+    Supports: csv, tsv.
+
+    The IR returned by read() can be passed to future mapping classes
+    such as TSDToTripleMapper for Layer 3 conversions.
     """
-    input_delimiter = TABULAR_FORMATS[input_format]
-    output_delimiter = TABULAR_FORMATS[output_format]
 
-    with open(input_file, "r", newline="", encoding="utf-8") as infile:
-        reader = csv.reader(infile, delimiter=input_delimiter)
-        rows = list(reader)
+    def read(self, source: str, input_format: str) -> list:
+        """Parse a tabular file into a list of rows (IR).
 
-    with open(output_file, "w", newline="", encoding="utf-8") as outfile:
-        writer = csv.writer(outfile, delimiter=output_delimiter)
-        writer.writerows(rows)
+        Each row is a list of string values. First row is the header.
 
-    print(
-        f"Converted {input_format} -> {output_format}: {os.path.basename(output_file)}"
-    )
+        Args:
+            source: Path to input file.
+            input_format: Source format name ('csv' or 'tsv').
+
+        Returns:
+            list[list[str]] where first element is the header row.
+
+        Raises:
+            ValueError: If input_format is not a recognised tabular format.
+        """
+        if input_format not in TABULAR_FORMATS:
+            raise ValueError(
+                f"'{input_format}' is not a tabular format. "
+                f"Supported: {list(TABULAR_FORMATS)}"
+            )
+        delimiter = TABULAR_FORMATS[input_format]
+        with open(source, "r", newline="", encoding="utf-8") as f:
+            reader = csv.reader(f, delimiter=delimiter)
+            return list(reader)
+
+    def write(self, data: list, target: str, output_format: str) -> None:
+        """Serialize a list of rows (IR) to a tabular file.
+
+        Args:
+            data: list[list[str]] to write.
+            target: Path to output file.
+            output_format: Target format name ('csv' or 'tsv').
+
+        Raises:
+            ValueError: If output_format is not a recognised tabular format.
+        """
+        if output_format not in TABULAR_FORMATS:
+            raise ValueError(
+                f"'{output_format}' is not a tabular format. "
+                f"Supported: {list(TABULAR_FORMATS)}"
+            )
+        delimiter = TABULAR_FORMATS[output_format]
+        with open(target, "w", newline="", encoding="utf-8") as f:
+            writer = csv.writer(f, delimiter=delimiter)
+            writer.writerows(data)
+
+    def convert(
+        self,
+        source: str,
+        target: str,
+        input_format: str,
+        output_format: str,
+    ) -> None:
+        """Convert between tabular formats (Layer 2, lossless).
+
+        Chains read() -> write(). Both formats must be in the same
+        equivalence class (tabular).
+
+        Args:
+            source: Path to input file.
+            target: Path to output file.
+            input_format: Source format name.
+            output_format: Target format name.
+        """
+        rows = self.read(source, input_format)
+        self.write(rows, target, output_format)
+        print(
+            f"Converted {input_format} -> {output_format}: "
+            f"{os.path.basename(target)}"
+        )
 
 
 # ---------------------------------------------------------------------------
-# Layer 3 — cross-class mapping conversion
+# Layer 3 prototype — RDF triples to CSV (not yet fully implemented)
 # ---------------------------------------------------------------------------
 
 def convert_rdf_to_csv(
@@ -256,20 +429,23 @@ def convert_rdf_to_csv(
     output_file: str,
     input_format: str,
 ) -> None:
-    """Map RDF triples to a wide CSV table (Layer 3).
+    """Map RDF triples to a wide CSV table (Layer 3 prototype).
 
     Each unique subject becomes a row. Each unique predicate becomes a column.
     Multi-valued predicates are pipe-separated.
-    A companion .meta.json file is generated alongside the CSV to preserve
-    RDF datatype and language tag information for lossless round trips.
+    A companion .meta.json file is generated to preserve RDF datatype and
+    language tag information for lossless round trips.
+
+    NOTE: This is a Layer 3 prototype. It is not yet tested and will be
+    properly implemented in the Layer 3 issue.
 
     Args:
         input_file: Path to input RDF triples file.
         output_file: Path to write output CSV file.
         input_format: Source triple format name (must be in RDF_TRIPLE_FORMATS).
     """
-    g = Graph()
-    g.parse(input_file, format=RDF_TRIPLE_FORMATS[input_format])
+    handler = TripleHandler()
+    g = handler.read(input_file, input_format)
 
     predicates = sorted(set(str(p) for s, p, o in g))
 
@@ -280,7 +456,6 @@ def convert_rdf_to_csv(
         subj = str(s)
         pred = str(p)
 
-        # capture datatype or language tag for companion file
         if hasattr(o, "datatype") and o.datatype:
             column_metadata[pred] = {"datatype": str(o.datatype)}
         elif hasattr(o, "language") and o.language:
@@ -292,15 +467,16 @@ def convert_rdf_to_csv(
             subjects[subj][pred] = []
         subjects[subj][pred].append(str(o))
 
-    with open(output_file, "w", newline="", encoding="utf-8") as f:
-        writer = csv.writer(f)
-        writer.writerow(["resource"] + predicates)
-        for subj, pred_map in subjects.items():
-            row = [subj]
-            for pred in predicates:
-                values = pred_map.get(pred, [])
-                row.append("|".join(values))
-            writer.writerow(row)
+    tsd_handler = TSDHandler()
+    rows = [["resource"] + predicates]
+    for subj, pred_map in subjects.items():
+        row = [subj]
+        for pred in predicates:
+            values = pred_map.get(pred, [])
+            row.append("|".join(values))
+        rows.append(row)
+
+    tsd_handler.write(rows, output_file, "csv")
 
     companion_file = output_file + ".meta.json"
     with open(companion_file, "w", encoding="utf-8") as f:
@@ -314,6 +490,12 @@ def convert_rdf_to_csv(
 # Main dispatcher — called from download pipeline
 # ---------------------------------------------------------------------------
 
+# Handler instances — created once, reused
+_triple_handler = TripleHandler()
+_quad_handler = QuadHandler()
+_tsd_handler = TSDHandler()
+
+
 def convert_file(
     input_file: str,
     output_file: str,
@@ -323,10 +505,7 @@ def convert_file(
 
     Detects the input format from the file extension, determines whether
     this is a Layer 2 (within-class) or Layer 3 (cross-class) conversion,
-    and delegates to the appropriate conversion function.
-
-    For Layer 2: lossless, same equivalence class.
-    For Layer 3: quasi-equal for RDF <-> Tabular, lossless for Triples <-> Quads.
+    and delegates to the appropriate handler.
 
     Args:
         input_file: Path to the input file (must be decompressed).
@@ -334,14 +513,15 @@ def convert_file(
         convert_format: Target format name (CLI format string).
 
     Raises:
-        ValueError: If the input format cannot be detected or if the
-                    requested conversion is not supported.
+        ValueError: If input format cannot be detected or conversion
+                    is not supported.
     """
     input_format = detect_format_from_filename(input_file)
 
     if input_format is None:
         raise ValueError(
-            f"Could not detect input format from filename: '{os.path.basename(input_file)}'. "
+            f"Could not detect input format from filename: "
+            f"'{os.path.basename(input_file)}'. "
             f"Supported extensions: {list(EXTENSION_TO_FORMAT.keys())}"
         )
 
@@ -358,20 +538,20 @@ def convert_file(
     # --- Layer 2: within-class ---
     if input_class == output_class:
         if input_class == "triples":
-            convert_rdf_triple_format(
+            _triple_handler.convert(
                 input_file, output_file, input_format, convert_format
             )
         elif input_class == "quads":
-            convert_rdf_quad_format(
+            _quad_handler.convert(
                 input_file, output_file, input_format, convert_format
             )
         elif input_class == "tabular":
-            convert_tabular_format(
+            _tsd_handler.convert(
                 input_file, output_file, input_format, convert_format
             )
         return
 
-    # --- Layer 3: cross-class ---
+    # --- Layer 3: cross-class (prototype only) ---
     if input_class == "triples" and output_class == "tabular":
         convert_rdf_to_csv(input_file, output_file, input_format)
         return
diff --git a/databusclient/api/download.py b/databusclient/api/download.py
index 74bd2fd..7c33fac 100644
--- a/databusclient/api/download.py
+++ b/databusclient/api/download.py
@@ -520,18 +520,16 @@ def _download_file(
         final_downloaded_file = target_filepath
 
     # --- 8. Convert file format if requested (AFTER compression conversion) ---
+    # Pipeline follows :decompress -> convert format -> recompress
+    # If the source was compressed, the converted output is recompressed:
+    #   - to the format specified by --convert-to if provided
+    #   - to the original compression format otherwise
     if convert_format:
         final_basename = os.path.basename(final_downloaded_file)
         compression_fmt = _detect_compression_format(final_basename)
 
         if compression_fmt:
-            # File is still compressed — decompress to a temp file first,
-            # then convert format, then clean up the temp file.
-            # This follows the pipeline: Download -> Decompress -> Convert -> Save
-            import tempfile
-
-            source_module = COMPRESSION_MODULES[compression_fmt]
-            # temp decompressed file sits next to the original
+            # File is still compressed — decompress to temp, convert, recompress
             compression_ext = COMPRESSION_EXTENSIONS[compression_fmt]
             if final_downloaded_file.lower().endswith(compression_ext):
                 temp_decompressed = final_downloaded_file[:-len(compression_ext)]
@@ -542,6 +540,7 @@ def _download_file(
                 print(
                     f"Decompressing {final_basename} before format conversion..."
                 )
+                source_module = COMPRESSION_MODULES[compression_fmt]
                 with source_module.open(final_downloaded_file, "rb") as sf:
                     with open(temp_decompressed, "wb") as tf:
                         while True:
@@ -550,20 +549,43 @@ def _download_file(
                                 break
                             tf.write(chunk)
 
-                # now convert the decompressed temp file
-                converted_filename = get_converted_filename(
+                # Convert format on the decompressed temp file
+                converted_basename = get_converted_filename(
                     final_basename, convert_format
                 )
-                converted_filepath = os.path.join(localDir, converted_filename)
+                converted_filepath = os.path.join(localDir, converted_basename)
                 convert_file(temp_decompressed, converted_filepath, convert_format)
 
+                # Recompress the converted output.
+                # Use --convert-to format if specified, otherwise use original compression.
+                recompress_fmt = convert_to if convert_to else compression_fmt
+                recompress_ext = COMPRESSION_EXTENSIONS[recompress_fmt]
+                recompressed_filepath = converted_filepath + recompress_ext
+                recompress_module = COMPRESSION_MODULES[recompress_fmt]
+
+                print(
+                    f"Recompressing converted file to {recompress_fmt}: "
+                    f"{os.path.basename(recompressed_filepath)}"
+                )
+                with open(converted_filepath, "rb") as sf:
+                    with recompress_module.open(recompressed_filepath, "wb") as tf:
+                        while True:
+                            chunk = sf.read(8192)
+                            if not chunk:
+                                break
+                            tf.write(chunk)
+
+                # Remove the uncompressed converted file — keep only recompressed
+                if os.path.exists(converted_filepath):
+                    os.remove(converted_filepath)
+
             finally:
-                # always clean up temp file even if conversion fails
+                # Always clean up temp decompressed file
                 if os.path.exists(temp_decompressed):
                     os.remove(temp_decompressed)
 
         else:
-            # file is already uncompressed — convert directly
+            # File is already uncompressed — convert directly, no recompression needed
             converted_filename = get_converted_filename(final_basename, convert_format)
             converted_filepath = os.path.join(localDir, converted_filename)
             convert_file(final_downloaded_file, converted_filepath, convert_format)
diff --git a/pyproject.toml b/pyproject.toml
index 92f479b..72179cc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,3 +29,9 @@ src = ["databusclient", "tests"]
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    "ignore::DeprecationWarning:rdflib",
+    "ignore::UserWarning:rdflib",
+]
diff --git a/tests/resources/sample.csv b/tests/resources/sample.csv
new file mode 100644
index 0000000..50dda4c
--- /dev/null
+++ b/tests/resources/sample.csv
@@ -0,0 +1,11 @@
+subject,predicate,object,graph
+https://example.org/data/alice,http://xmlns.com/foaf/0.1/name,Alice,https://example.org/graph/people
+https://example.org/data/alice,https://example.org/vocab/age,29,https://example.org/graph/people
+https://example.org/data/alice,https://example.org/vocab/livesAt,_:address1,https://example.org/graph/people
+_:address1,https://example.org/vocab/city,Leipzig,https://example.org/graph/people
+_:address1,https://example.org/vocab/country,Germany,https://example.org/graph/people
+https://example.org/data/bob,http://xmlns.com/foaf/0.1/name,Bob,https://example.org/graph/people
+https://example.org/data/bob,https://example.org/vocab/age,34,https://example.org/graph/people
+https://example.org/data/bob,https://example.org/vocab/knows,https://example.org/data/alice,https://example.org/graph/people
+https://example.org/data/project1,https://example.org/vocab/title,Databus Example Project,https://example.org/graph/projects
+https://example.org/data/project1,https://example.org/vocab/member,https://example.org/data/alice,https://example.org/graph/projects
\ No newline at end of file
diff --git a/tests/resources/sample.jsonld b/tests/resources/sample.jsonld
new file mode 100644
index 0000000..af80f31
--- /dev/null
+++ b/tests/resources/sample.jsonld
@@ -0,0 +1,62 @@
+{
+  "@context": {
+    "@base": "https://example.org/data/",
+    "ex": "https://example.org/vocab/",
+    "foaf": "http://xmlns.com/foaf/0.1/",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "name": "foaf:name",
+    "age": {
+      "@id": "ex:age",
+      "@type": "xsd:integer"
+    },
+    "livesAt": {
+      "@id": "ex:livesAt",
+      "@type": "@id"
+    },
+    "city": "ex:city",
+    "country": "ex:country",
+    "knows": {
+      "@id": "ex:knows",
+      "@type": "@id"
+    },
+    "title": "ex:title",
+    "member": {
+      "@id": "ex:member",
+      "@type": "@id"
+    }
+  },
+  "@graph": [
+    {
+      "@id": "https://example.org/graph/people",
+      "@graph": [
+        {
+          "@id": "alice",
+          "name": "Alice",
+          "age": 29,
+          "livesAt": "_:address1"
+        },
+        {
+          "@id": "_:address1",
+          "city": "Leipzig",
+          "country": "Germany"
+        },
+        {
+          "@id": "bob",
+          "name": "Bob",
+          "age": 34,
+          "knows": "alice"
+        }
+      ]
+    },
+    {
+      "@id": "https://example.org/graph/projects",
+      "@graph": [
+        {
+          "@id": "project1",
+          "title": "Databus Example Project",
+          "member": "alice"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/resources/sample.nq b/tests/resources/sample.nq
new file mode 100644
index 0000000..a111652
--- /dev/null
+++ b/tests/resources/sample.nq
@@ -0,0 +1,10 @@
+<https://example.org/data/alice> <http://xmlns.com/foaf/0.1/name> "Alice" <https://example.org/graph/people> .
+<https://example.org/data/alice> <https://example.org/vocab/age> "29"^^<http://www.w3.org/2001/XMLSchema#integer> <https://example.org/graph/people> .
+<https://example.org/data/alice> <https://example.org/vocab/livesAt> _:address1 <https://example.org/graph/people> .
+_:address1 <https://example.org/vocab/city> "Leipzig" <https://example.org/graph/people> .
+_:address1 <https://example.org/vocab/country> "Germany" <https://example.org/graph/people> .
+<https://example.org/data/bob> <http://xmlns.com/foaf/0.1/name> "Bob" <https://example.org/graph/people> .
+<https://example.org/data/bob> <https://example.org/vocab/age> "34"^^<http://www.w3.org/2001/XMLSchema#integer> <https://example.org/graph/people> .
+<https://example.org/data/bob> <https://example.org/vocab/knows> <https://example.org/data/alice> <https://example.org/graph/people> .
+<https://example.org/data/project1> <https://example.org/vocab/title> "Databus Example Project" <https://example.org/graph/projects> .
+<https://example.org/data/project1> <https://example.org/vocab/member> <https://example.org/data/alice> <https://example.org/graph/projects> .
\ No newline at end of file
diff --git a/tests/resources/sample.nt b/tests/resources/sample.nt
new file mode 100644
index 0000000..f6b8488
--- /dev/null
+++ b/tests/resources/sample.nt
@@ -0,0 +1,10 @@
+<https://example.org/data/alice> <http://xmlns.com/foaf/0.1/name> "Alice" .
+<https://example.org/data/alice> <https://example.org/vocab/age> "29"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<https://example.org/data/alice> <https://example.org/vocab/livesAt> _:address1 .
+_:address1 <https://example.org/vocab/city> "Leipzig" .
+_:address1 <https://example.org/vocab/country> "Germany" .
+<https://example.org/data/bob> <http://xmlns.com/foaf/0.1/name> "Bob" .
+<https://example.org/data/bob> <https://example.org/vocab/age> "34"^^<http://www.w3.org/2001/XMLSchema#integer> .
+<https://example.org/data/bob> <https://example.org/vocab/knows> <https://example.org/data/alice> .
+<https://example.org/data/project1> <https://example.org/vocab/title> "Databus Example Project" .
+<https://example.org/data/project1> <https://example.org/vocab/member> <https://example.org/data/alice> .
\ No newline at end of file
diff --git a/tests/resources/sample.rdf b/tests/resources/sample.rdf
new file mode 100644
index 0000000..c8bb09a
--- /dev/null
+++ b/tests/resources/sample.rdf
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<rdf:RDF
+    xml:base="https://example.org/data/"
+    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+    xmlns:ex="https://example.org/vocab/"
+    xmlns:foaf="http://xmlns.com/foaf/0.1/">
+
+  <rdf:Description rdf:about="alice">
+    <foaf:name>Alice</foaf:name>
+    <ex:age rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">29</ex:age>
+    <ex:livesAt rdf:nodeID="address1"/>
+  </rdf:Description>
+
+  <rdf:Description rdf:nodeID="address1">
+    <ex:city>Leipzig</ex:city>
+    <ex:country>Germany</ex:country>
+  </rdf:Description>
+
+  <rdf:Description rdf:about="bob">
+    <foaf:name>Bob</foaf:name>
+    <ex:age rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">34</ex:age>
+    <ex:knows rdf:resource="alice"/>
+  </rdf:Description>
+
+  <rdf:Description rdf:about="project1">
+    <ex:title>Databus Example Project</ex:title>
+    <ex:member rdf:resource="alice"/>
+  </rdf:Description>
+
+</rdf:RDF>
\ No newline at end of file
diff --git a/tests/resources/sample.trig b/tests/resources/sample.trig
new file mode 100644
index 0000000..e4abc3f
--- /dev/null
+++ b/tests/resources/sample.trig
@@ -0,0 +1,22 @@
+@base <https://example.org/data/> .
+@prefix ex: <https://example.org/vocab/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<https://example.org/graph/people> {
+  <alice> foaf:name "Alice" ;
+      ex:age 29 ;
+      ex:livesAt _:address1 .
+
+  _:address1 ex:city "Leipzig" ;
+      ex:country "Germany" .
+
+  <bob> foaf:name "Bob" ;
+      ex:age 34 ;
+      ex:knows <alice> .
+}
+
+<https://example.org/graph/projects> {
+  <project1> ex:title "Databus Example Project" ;
+      ex:member <alice> .
+}
\ No newline at end of file
diff --git a/tests/resources/sample.trix b/tests/resources/sample.trix
new file mode 100644
index 0000000..d8edb13
--- /dev/null
+++ b/tests/resources/sample.trix
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<TriX xmlns="http://www.w3.org/2004/03/trix/trix-1/">
+
+  <graph>
+    <uri>https://example.org/graph/people</uri>
+
+    <triple>
+      <uri>https://example.org/data/alice</uri>
+      <uri>http://xmlns.com/foaf/0.1/name</uri>
+      <plainLiteral>Alice</plainLiteral>
+    </triple>
+
+    <triple>
+      <uri>https://example.org/data/alice</uri>
+      <uri>https://example.org/vocab/age</uri>
+      <typedLiteral datatype="http://www.w3.org/2001/XMLSchema#integer">29</typedLiteral>
+    </triple>
+
+    <triple>
+      <uri>https://example.org/data/alice</uri>
+      <uri>https://example.org/vocab/livesAt</uri>
+      <id>address1</id>
+    </triple>
+
+    <triple>
+      <id>address1</id>
+      <uri>https://example.org/vocab/city</uri>
+      <plainLiteral>Leipzig</plainLiteral>
+    </triple>
+
+    <triple>
+      <id>address1</id>
+      <uri>https://example.org/vocab/country</uri>
+      <plainLiteral>Germany</plainLiteral>
+    </triple>
+
+    <triple>
+      <uri>https://example.org/data/bob</uri>
+      <uri>http://xmlns.com/foaf/0.1/name</uri>
+      <plainLiteral>Bob</plainLiteral>
+    </triple>
+
+    <triple>
+      <uri>https://example.org/data/bob</uri>
+      <uri>https://example.org/vocab/age</uri>
+      <typedLiteral datatype="http://www.w3.org/2001/XMLSchema#integer">34</typedLiteral>
+    </triple>
+
+    <triple>
+      <uri>https://example.org/data/bob</uri>
+      <uri>https://example.org/vocab/knows</uri>
+      <uri>https://example.org/data/alice</uri>
+    </triple>
+  </graph>
+
+  <graph>
+    <uri>https://example.org/graph/projects</uri>
+
+    <triple>
+      <uri>https://example.org/data/project1</uri>
+      <uri>https://example.org/vocab/title</uri>
+      <plainLiteral>Databus Example Project</plainLiteral>
+    </triple>
+
+    <triple>
+      <uri>https://example.org/data/project1</uri>
+      <uri>https://example.org/vocab/member</uri>
+      <uri>https://example.org/data/alice</uri>
+    </triple>
+  </graph>
+
+</TriX>
\ No newline at end of file
diff --git a/tests/resources/sample.tsv b/tests/resources/sample.tsv
new file mode 100644
index 0000000..c23af40
--- /dev/null
+++ b/tests/resources/sample.tsv
@@ -0,0 +1,11 @@
+subject	predicate	object	graph
+https://example.org/data/alice	http://xmlns.com/foaf/0.1/name	Alice	https://example.org/graph/people
+https://example.org/data/alice	https://example.org/vocab/age	29	https://example.org/graph/people
+https://example.org/data/alice	https://example.org/vocab/livesAt	_:address1	https://example.org/graph/people
+_:address1	https://example.org/vocab/city	Leipzig	https://example.org/graph/people
+_:address1	https://example.org/vocab/country	Germany	https://example.org/graph/people
+https://example.org/data/bob	http://xmlns.com/foaf/0.1/name	Bob	https://example.org/graph/people
+https://example.org/data/bob	https://example.org/vocab/age	34	https://example.org/graph/people
+https://example.org/data/bob	https://example.org/vocab/knows	https://example.org/data/alice	https://example.org/graph/people
+https://example.org/data/project1	https://example.org/vocab/title	Databus Example Project	https://example.org/graph/projects
+https://example.org/data/project1	https://example.org/vocab/member	https://example.org/data/alice	https://example.org/graph/projects
\ No newline at end of file
diff --git a/tests/resources/sample.ttl b/tests/resources/sample.ttl
new file mode 100644
index 0000000..a8eb198
--- /dev/null
+++ b/tests/resources/sample.ttl
@@ -0,0 +1,18 @@
+@base <https://example.org/data/> .
+@prefix ex: <https://example.org/vocab/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<alice> foaf:name "Alice" ;
+    ex:age 29 ;
+    ex:livesAt _:address1 .
+
+_:address1 ex:city "Leipzig" ;
+    ex:country "Germany" .
+
+<bob> foaf:name "Bob" ;
+    ex:age 34 ;
+    ex:knows <alice> .
+
+<project1> ex:title "Databus Example Project" ;
+    ex:member <alice> .
\ No newline at end of file
diff --git a/tests/test_conversion.py b/tests/test_conversion.py
deleted file mode 100644
index 7ce710c..0000000
--- a/tests/test_conversion.py
+++ /dev/null
@@ -1,309 +0,0 @@
-"""Round trip tests for Layer 2 format conversion.
-
-Following the strategy from Frey et al., each test validates that
-reading a format and writing it back produces semantically identical output.
-Pattern: parse(format X) -> serialize(format X) -> parse again -> compare.
-
-9 tests total:
-- Triple formats: ntriples, turtle, rdf-xml          (3 tests)
-- Quad formats:   nquads, trig, trix, json-ld        (4 tests)
-- Tabular formats: csv, tsv                           (2 tests)
-"""
-
-import csv
-import os
-import tempfile
-
-from rdflib import Dataset, Graph
-
-from databusclient.api.convert import (
-    convert_rdf_quad_format,
-    convert_rdf_triple_format,
-    convert_tabular_format,
-)
-
-# ---------------------------------------------------------------------------
-# Sample RDF data used across all RDF tests
-# ---------------------------------------------------------------------------
-
-SAMPLE_TURTLE = """
-@prefix ex: <http://example.org/> .
-@prefix schema: <http://schema.org/> .
-@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
-
-ex:Paris schema:isCapitalOf ex:France ;
-         schema:population "2161000"^^xsd:integer .
-
-ex:Berlin schema:isCapitalOf ex:Germany ;
-          schema:population "3645000"^^xsd:integer .
-"""
-
-SAMPLE_NQUADS = """
-<http://example.org/Paris> <http://schema.org/isCapitalOf> <http://example.org/France> <http://example.org/graph1> .
-<http://example.org/Berlin> <http://schema.org/isCapitalOf> <http://example.org/Germany> <http://example.org/graph1> .
-<http://example.org/Rome> <http://schema.org/isCapitalOf> <http://example.org/Italy> <http://example.org/graph2> .
-"""
-
-SAMPLE_CSV = """resource,name,population
-http://example.org/Paris,Paris,2161000
-http://example.org/Berlin,Berlin,3645000
-"""
-
-SAMPLE_TSV = "resource\tname\tpopulation\nhttp://example.org/Paris\tParis\t2161000\nhttp://example.org/Berlin\tBerlin\t3645000\n"
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _write_temp(content: str, suffix: str) -> str:
-    """Write content to a named temp file and return its path."""
-    fd, path = tempfile.mkstemp(suffix=suffix)
-    with os.fdopen(fd, "w", encoding="utf-8") as f:
-        f.write(content)
-    return path
-
-
-def _graphs_are_isomorphic(g1: Graph, g2: Graph) -> bool:
-    """Check semantic equivalence of two rdflib Graphs."""
-    return g1.isomorphic(g2)
-
-
-def _datasets_equal(g1: Dataset, g2: Dataset) -> bool:
-    """Check semantic equivalence of two Datasets by triple count and graph names."""
-    if len(g1) != len(g2):
-        return False
-    graphs1 = {str(c.identifier) for c in g1.graphs()}
-    graphs2 = {str(c.identifier) for c in g2.graphs()}
-    return graphs1 == graphs2
-
-
-# ---------------------------------------------------------------------------
-# Triple format round trip tests (Layer 2)
-# ---------------------------------------------------------------------------
-
-def test_round_trip_turtle():
-    """Turtle -> Turtle: parse, serialize, reparse, compare."""
-    input_path = _write_temp(SAMPLE_TURTLE, ".ttl")
-    output_path = input_path + ".rt.ttl"
-
-    try:
-        convert_rdf_triple_format(input_path, output_path, "turtle", "turtle")
-
-        g_original = Graph()
-        g_original.parse(input_path, format="turtle")
-
-        g_roundtrip = Graph()
-        g_roundtrip.parse(output_path, format="turtle")
-
-        assert _graphs_are_isomorphic(g_original, g_roundtrip), (
-            "Turtle round trip failed: graphs are not isomorphic"
-        )
-    finally:
-        for p in (input_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-def test_round_trip_ntriples():
-    """N-Triples -> N-Triples: parse, serialize, reparse, compare."""
-    # first produce an ntriples file from turtle
-    turtle_path = _write_temp(SAMPLE_TURTLE, ".ttl")
-    nt_path = turtle_path + ".nt"
-    output_path = nt_path + ".rt.nt"
-
-    try:
-        convert_rdf_triple_format(turtle_path, nt_path, "turtle", "ntriples")
-        convert_rdf_triple_format(nt_path, output_path, "ntriples", "ntriples")
-
-        g_original = Graph()
-        g_original.parse(nt_path, format="ntriples")
-
-        g_roundtrip = Graph()
-        g_roundtrip.parse(output_path, format="ntriples")
-
-        assert _graphs_are_isomorphic(g_original, g_roundtrip), (
-            "N-Triples round trip failed: graphs are not isomorphic"
-        )
-    finally:
-        for p in (turtle_path, nt_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-def test_round_trip_rdf_xml():
-    """RDF/XML -> RDF/XML: parse, serialize, reparse, compare."""
-    turtle_path = _write_temp(SAMPLE_TURTLE, ".ttl")
-    rdf_path = turtle_path + ".rdf"
-    output_path = rdf_path + ".rt.rdf"
-
-    try:
-        convert_rdf_triple_format(turtle_path, rdf_path, "turtle", "rdf-xml")
-        convert_rdf_triple_format(rdf_path, output_path, "rdf-xml", "rdf-xml")
-
-        g_original = Graph()
-        g_original.parse(rdf_path, format="xml")
-
-        g_roundtrip = Graph()
-        g_roundtrip.parse(output_path, format="xml")
-
-        assert _graphs_are_isomorphic(g_original, g_roundtrip), (
-            "RDF/XML round trip failed: graphs are not isomorphic"
-        )
-    finally:
-        for p in (turtle_path, rdf_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-# ---------------------------------------------------------------------------
-# Quad format round trip tests (Layer 2)
-# ---------------------------------------------------------------------------
-
-def test_round_trip_nquads():
-    """N-Quads -> N-Quads: parse, serialize, reparse, compare."""
-    input_path = _write_temp(SAMPLE_NQUADS, ".nq")
-    output_path = input_path + ".rt.nq"
-
-    try:
-        convert_rdf_quad_format(input_path, output_path, "nquads", "nquads")
-
-        g_original = Dataset()
-        g_original.parse(input_path, format="nquads")
-
-        g_roundtrip = Dataset()
-        g_roundtrip.parse(output_path, format="nquads")
-
-        assert _datasets_equal(g_original, g_roundtrip), (
-            "N-Quads round trip failed: graphs are not equal"
-        )
-    finally:
-        for p in (input_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-def test_round_trip_trig():
-    """TriG -> TriG: parse, serialize, reparse, compare."""
-    # produce trig from nquads
-    nq_path = _write_temp(SAMPLE_NQUADS, ".nq")
-    trig_path = nq_path + ".trig"
-    output_path = trig_path + ".rt.trig"
-
-    try:
-        convert_rdf_quad_format(nq_path, trig_path, "nquads", "trig")
-        convert_rdf_quad_format(trig_path, output_path, "trig", "trig")
-
-        g_original = Dataset()
-        g_original.parse(trig_path, format="trig")
-
-        g_roundtrip = Dataset()
-        g_roundtrip.parse(output_path, format="trig")
-
-        assert _datasets_equal(g_original, g_roundtrip), (
-            "TriG round trip failed: graphs are not equal"
-        )
-    finally:
-        for p in (nq_path, trig_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-def test_round_trip_trix():
-    """TriX -> TriX: parse, serialize, reparse, compare."""
-    nq_path = _write_temp(SAMPLE_NQUADS, ".nq")
-    trix_path = nq_path + ".trix"
-    output_path = trix_path + ".rt.trix"
-
-    try:
-        convert_rdf_quad_format(nq_path, trix_path, "nquads", "trix")
-        convert_rdf_quad_format(trix_path, output_path, "trix", "trix")
-
-        g_original = Dataset()
-        g_original.parse(trix_path, format="trix")
-
-        g_roundtrip = Dataset()
-        g_roundtrip.parse(output_path, format="trix")
-
-        assert _datasets_equal(g_original, g_roundtrip), (
-            "TriX round trip failed: graphs are not equal"
-        )
-    finally:
-        for p in (nq_path, trix_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-def test_round_trip_json_ld():
-    """JSON-LD -> JSON-LD: parse, serialize, reparse, compare."""
-    nq_path = _write_temp(SAMPLE_NQUADS, ".nq")
-    jsonld_path = nq_path + ".jsonld"
-    output_path = jsonld_path + ".rt.jsonld"
-
-    try:
-        convert_rdf_quad_format(nq_path, jsonld_path, "nquads", "json-ld")
-        convert_rdf_quad_format(jsonld_path, output_path, "json-ld", "json-ld")
-
-        g_original = Dataset()
-        g_original.parse(jsonld_path, format="json-ld")
-
-        g_roundtrip = Dataset()
-        g_roundtrip.parse(output_path, format="json-ld")
-
-        assert _datasets_equal(g_original, g_roundtrip), (
-            "JSON-LD round trip failed: graphs are not equal"
-        )
-    finally:
-        for p in (nq_path, jsonld_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-# ---------------------------------------------------------------------------
-# Tabular format round trip tests (Layer 2)
-# ---------------------------------------------------------------------------
-
-def test_round_trip_csv():
-    """CSV -> CSV: read, write, reread, compare rows."""
-    input_path = _write_temp(SAMPLE_CSV, ".csv")
-    output_path = input_path + ".rt.csv"
-
-    try:
-        convert_tabular_format(input_path, output_path, "csv", "csv")
-
-        with open(input_path, newline="", encoding="utf-8") as f:
-            original_rows = list(csv.reader(f))
-
-        with open(output_path, newline="", encoding="utf-8") as f:
-            roundtrip_rows = list(csv.reader(f))
-
-        assert original_rows == roundtrip_rows, (
-            "CSV round trip failed: rows do not match"
-        )
-    finally:
-        for p in (input_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
-
-
-def test_round_trip_tsv():
-    """TSV -> TSV: read, write, reread, compare rows."""
-    input_path = _write_temp(SAMPLE_TSV, ".tsv")
-    output_path = input_path + ".rt.tsv"
-
-    try:
-        convert_tabular_format(input_path, output_path, "tsv", "tsv")
-
-        with open(input_path, newline="", encoding="utf-8") as f:
-            original_rows = list(csv.reader(f, delimiter="\t"))
-
-        with open(output_path, newline="", encoding="utf-8") as f:
-            roundtrip_rows = list(csv.reader(f, delimiter="\t"))
-
-        assert original_rows == roundtrip_rows, (
-            "TSV round trip failed: rows do not match"
-        )
-    finally:
-        for p in (input_path, output_path):
-            if os.path.exists(p):
-                os.remove(p)
\ No newline at end of file
diff --git a/tests/test_format_round_trips.py b/tests/test_format_round_trips.py
new file mode 100644
index 0000000..ebe9f76
--- /dev/null
+++ b/tests/test_format_round_trips.py
@@ -0,0 +1,256 @@
+"""Round trip tests for Layer 2 format conversion.
+
+Following the strategy from Frey et al., each test validates that
+reading a format and writing it back produces semantically identical output.
+
+The key validation pattern using handlers and IR:
+    1. Read original file into IR (Graph/Dataset/rows) BEFORE any conversion
+    2. Convert the file through the handler (read -> write cycle)
+    3. Read the converted output back into IR
+    4. Compare both IRs — if conversion lost data, IRs will differ
+
+This correctly catches information loss because g_original is captured
+BEFORE serialization, not after. Both IRs use the same rdflib internal
+representation, making comparison meaningful at the data level.
+
+Test data lives in tests/resources/ — one sample file per format.
+These files are semantically consistent (same cities dataset across
+all formats) and are shared across Layer 2 and future Layer 3 tests.
+
+9 round trip tests total:
+    Triple formats: turtle, ntriples, rdf-xml             (3 tests)
+    Quad formats:   nquads, trig, trix, json-ld           (4 tests)
+    Tabular formats: csv, tsv                              (2 tests)
+"""
+
+import os
+import tempfile
+
+from databusclient.api.convert import (
+    QuadHandler,
+    TSDHandler,
+    TripleHandler,
+)
+
+# ---------------------------------------------------------------------------
+# Path to shared test resources
+# ---------------------------------------------------------------------------
+
+RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
+
+
+def resource(filename: str) -> str:
+    """Return absolute path to a file in tests/resources/."""
+    return os.path.join(RESOURCES, filename)
+
+
+# ---------------------------------------------------------------------------
+# Handler instances shared across tests
+# ---------------------------------------------------------------------------
+
+triple_handler = TripleHandler()
+quad_handler = QuadHandler()
+tsd_handler = TSDHandler()
+
+
+# ---------------------------------------------------------------------------
+# Triple format round trip tests (Layer 2)
+# ---------------------------------------------------------------------------
+
+def test_round_trip_turtle():
+    """Turtle -> Turtle: read into IR before conversion, compare after."""
+    source = resource("sample.ttl")
+    g_original = triple_handler.read(source, "turtle")
+
+    with tempfile.NamedTemporaryFile(suffix=".ttl", delete=False) as f:
+        output = f.name
+    try:
+        triple_handler.convert(source, output, "turtle", "turtle")
+        g_roundtrip = triple_handler.read(output, "turtle")
+        assert g_original.isomorphic(g_roundtrip), (
+            "Turtle round trip failed: graphs are not isomorphic"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+def test_round_trip_ntriples():
+    """N-Triples -> N-Triples: read into IR before conversion, compare after."""
+    source = resource("sample.nt")
+    g_original = triple_handler.read(source, "ntriples")
+
+    with tempfile.NamedTemporaryFile(suffix=".nt", delete=False) as f:
+        output = f.name
+    try:
+        triple_handler.convert(source, output, "ntriples", "ntriples")
+        g_roundtrip = triple_handler.read(output, "ntriples")
+        assert g_original.isomorphic(g_roundtrip), (
+            "N-Triples round trip failed: graphs are not isomorphic"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+def test_round_trip_rdf_xml():
+    """RDF/XML -> RDF/XML: read into IR before conversion, compare after."""
+    source = resource("sample.rdf")
+    g_original = triple_handler.read(source, "rdf-xml")
+
+    with tempfile.NamedTemporaryFile(suffix=".rdf", delete=False) as f:
+        output = f.name
+    try:
+        triple_handler.convert(source, output, "rdf-xml", "rdf-xml")
+        g_roundtrip = triple_handler.read(output, "rdf-xml")
+        assert g_original.isomorphic(g_roundtrip), (
+            "RDF/XML round trip failed: graphs are not isomorphic"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+# ---------------------------------------------------------------------------
+# Quad format round trip tests (Layer 2)
+# ---------------------------------------------------------------------------
+
+def _datasets_equal(d1, d2) -> bool:
+    """Check semantic equivalence of two Datasets.
+
+    Compares total triple count, named graph identifiers, and
+    performs isomorphism check on each named graph to correctly
+    handle blank node renaming during serialization.
+    """
+    if len(d1) != len(d2):
+        return False
+
+    graphs1 = {str(g.identifier) for g in d1.graphs()}
+    graphs2 = {str(g.identifier) for g in d2.graphs()}
+    if graphs1 != graphs2:
+        return False
+
+    # Compare triples inside each named graph using isomorphism
+    # to correctly handle blank nodes that may be renamed during
+    # serialization/deserialization
+    for g1 in d1.graphs():
+        graph_id = str(g1.identifier)
+        g2 = d2.get_context(g1.identifier)
+        if g2 is None:
+            return False
+        if not g1.isomorphic(g2):
+            return False
+
+    return True
+
+
+def test_round_trip_nquads():
+    """N-Quads -> N-Quads: read into IR before conversion, compare after."""
+    source = resource("sample.nq")
+    d_original = quad_handler.read(source, "nquads")
+
+    with tempfile.NamedTemporaryFile(suffix=".nq", delete=False) as f:
+        output = f.name
+    try:
+        quad_handler.convert(source, output, "nquads", "nquads")
+        d_roundtrip = quad_handler.read(output, "nquads")
+        assert _datasets_equal(d_original, d_roundtrip), (
+            "N-Quads round trip failed: datasets are not equal"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+def test_round_trip_trig():
+    """TriG -> TriG: read into IR before conversion, compare after."""
+    source = resource("sample.trig")
+    d_original = quad_handler.read(source, "trig")
+
+    with tempfile.NamedTemporaryFile(suffix=".trig", delete=False) as f:
+        output = f.name
+    try:
+        quad_handler.convert(source, output, "trig", "trig")
+        d_roundtrip = quad_handler.read(output, "trig")
+        assert _datasets_equal(d_original, d_roundtrip), (
+            "TriG round trip failed: datasets are not equal"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+def test_round_trip_trix():
+    """TriX -> TriX: read into IR before conversion, compare after."""
+    source = resource("sample.trix")
+    d_original = quad_handler.read(source, "trix")
+
+    with tempfile.NamedTemporaryFile(suffix=".trix", delete=False) as f:
+        output = f.name
+    try:
+        quad_handler.convert(source, output, "trix", "trix")
+        d_roundtrip = quad_handler.read(output, "trix")
+        assert _datasets_equal(d_original, d_roundtrip), (
+            "TriX round trip failed: datasets are not equal"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+def test_round_trip_json_ld():
+    """JSON-LD -> JSON-LD: read into IR before conversion, compare after."""
+    source = resource("sample.jsonld")
+    d_original = quad_handler.read(source, "json-ld")
+
+    with tempfile.NamedTemporaryFile(suffix=".jsonld", delete=False) as f:
+        output = f.name
+    try:
+        quad_handler.convert(source, output, "json-ld", "json-ld")
+        d_roundtrip = quad_handler.read(output, "json-ld")
+        assert _datasets_equal(d_original, d_roundtrip), (
+            "JSON-LD round trip failed: datasets are not equal"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+# ---------------------------------------------------------------------------
+# Tabular format round trip tests (Layer 2)
+# ---------------------------------------------------------------------------
+
+def test_round_trip_csv():
+    """CSV -> CSV: read into IR before conversion, compare after."""
+    source = resource("sample.csv")
+    rows_original = tsd_handler.read(source, "csv")
+
+    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
+        output = f.name
+    try:
+        tsd_handler.convert(source, output, "csv", "csv")
+        rows_roundtrip = tsd_handler.read(output, "csv")
+        assert rows_original == rows_roundtrip, (
+            "CSV round trip failed: rows do not match"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
+
+
+def test_round_trip_tsv():
+    """TSV -> TSV: read into IR before conversion, compare after."""
+    source = resource("sample.tsv")
+    rows_original = tsd_handler.read(source, "tsv")
+
+    with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
+        output = f.name
+    try:
+        tsd_handler.convert(source, output, "tsv", "tsv")
+        rows_roundtrip = tsd_handler.read(output, "tsv")
+        assert rows_original == rows_roundtrip, (
+            "TSV round trip failed: rows do not match"
+        )
+    finally:
+        if os.path.exists(output):
+            os.remove(output)
\ No newline at end of file

From f2fe92eddf6540ca8880865230020a70b660f0ec Mon Sep 17 00:00:00 2001
From: DhanashreePetare <dhanashreepetare8125@gmail.com>
Date: Thu, 11 Jun 2026 01:42:48 +0530
Subject: [PATCH 3/7] Improvements to Format layer implementation

---
 databusclient/api/convert.py          | 589 ++------------------------
 databusclient/api/download.py         | 173 ++++----
 databusclient/filehandling/format.py  | 511 ++++++++++++++++++++++
 databusclient/filehandling/mapping.py |  68 +++
 pyproject.toml                        |   3 +
 run_all_conversion_tests.py           |   4 +
 6 files changed, 719 insertions(+), 629 deletions(-)
 create mode 100644 databusclient/filehandling/format.py
 create mode 100644 databusclient/filehandling/mapping.py

diff --git a/databusclient/api/convert.py b/databusclient/api/convert.py
index 8bd6dbb..b095eaf 100644
--- a/databusclient/api/convert.py
+++ b/databusclient/api/convert.py
@@ -1,563 +1,50 @@
-"""Format and Mapping Conversion Layer.
-
-This module implements the format conversion pipeline for the Databus Python Client
-
-Layer 2: Within-class format conversion (lossless).
-    - TripleHandler: RDF triple formats (turtle, ntriples, rdf-xml)
-    - QuadHandler:   RDF quad formats (nquads, trig, trix, json-ld)
-    - TSDHandler:    Tabular formats (csv, tsv)
-
-Layer 3 (prototype, not yet fully implemented):
-    - RDF triples -> CSV/TSV (quasi-equal, companion metadata generated)
-
-Each handler provides read() -> IR, write(IR) -> file, convert() -> chains both.
-The IR (intermediate representation) returned by read() is designed to be passed
-to future mapping classes (TripleToQuadMapper, TripleToTSDMapper, etc.).
-"""
-
-import csv
-import json
-import os
-import warnings
-from typing import Optional
-
-# Suppress rdflib internal DeprecationWarning for Dataset API.
-# rdflib is mid-migration from ConjunctiveGraph to Dataset in 7.x.
-# These warnings originate from rdflib internals, not our code.
-# Can be removed when rdflib completes their Dataset API migration.
-warnings.filterwarnings("ignore", category=DeprecationWarning, module="rdflib")
-warnings.filterwarnings("ignore", category=UserWarning, module="rdflib")
-
-from rdflib import Dataset, Graph
-
-
-# ---------------------------------------------------------------------------
-# Format registries
-# ---------------------------------------------------------------------------
-
-# Maps CLI format name -> rdflib format string
-RDF_TRIPLE_FORMATS = {
-    "ntriples": "ntriples",
-    "turtle": "turtle",
-    "rdf-xml": "xml",
-}
-
-RDF_QUAD_FORMATS = {
-    "nquads": "nquads",
-    "trig": "trig",
-    "trix": "trix",
-    "json-ld": "json-ld",
-}
-
-TABULAR_FORMATS = {
-    "csv": ",",
-    "tsv": "\t",
-}
-
-ALL_FORMATS = (
-    list(RDF_TRIPLE_FORMATS)
-    + list(RDF_QUAD_FORMATS)
-    + list(TABULAR_FORMATS)
+from databusclient.filehandling.format import convert_file, get_converted_filename
+from databusclient.filehandling import mapping as _mapping
+
+from databusclient.filehandling.format import (  # noqa: F401
+    ALL_FORMATS,
+    EXTENSION_TO_FORMAT,
+    FORMAT_TO_EXTENSION,
+    RDF_QUAD_FORMATS,
+    RDF_TRIPLE_FORMATS,
+    TABULAR_FORMATS,
+    QuadHandler,
+    TSDHandler,
+    TripleHandler,
+    _quad_handler,
+    _tsd_handler,
+    _triple_handler,
+    detect_format_from_filename,
+    get_format_class,
 )
 
-# Maps file extension -> CLI format name
-EXTENSION_TO_FORMAT = {
-    ".ttl": "turtle",
-    ".nt": "ntriples",
-    ".rdf": "rdf-xml",
-    ".xml": "rdf-xml",
-    ".owl": "rdf-xml",
-    ".nq": "nquads",
-    ".trig": "trig",
-    ".trix": "trix",
-    ".jsonld": "json-ld",
-    ".json": "json-ld",
-    ".csv": "csv",
-    ".tsv": "tsv",
-}
-
-# Maps format name -> file extension
-FORMAT_TO_EXTENSION = {
-    "ntriples": ".nt",
-    "turtle": ".ttl",
-    "rdf-xml": ".rdf",
-    "nquads": ".nq",
-    "trig": ".trig",
-    "trix": ".trix",
-    "json-ld": ".jsonld",
-    "csv": ".csv",
-    "tsv": ".tsv",
-}
-
-
-# ---------------------------------------------------------------------------
-# Format detection helpers
-# ---------------------------------------------------------------------------
-
-def detect_format_from_filename(filename: str) -> Optional[str]:
-    """Detect format from file extension, ignoring compression extensions.
-
-    Args:
-        filename: File name or path.
-
-    Returns:
-        Format name string or None if not detectable.
-    """
-    name = filename.lower()
-
-    # strip compression extension first
-    for ext in (".bz2", ".gz", ".xz"):
-        if name.endswith(ext):
-            name = name[: -len(ext)]
-            break
-
-    # match longest extension first to avoid .json matching before .jsonld
-    for ext in sorted(EXTENSION_TO_FORMAT.keys(), key=len, reverse=True):
-        if name.endswith(ext):
-            return EXTENSION_TO_FORMAT[ext]
-
-    return None
-
-
-def get_format_class(fmt: str) -> str:
-    """Return equivalence class for a format name.
-
-    Args:
-        fmt: Format name (e.g. 'turtle', 'nquads', 'csv').
-
-    Returns:
-        'triples', 'quads', or 'tabular'.
-
-    Raises:
-        ValueError: If format is not recognised.
-    """
-    if fmt in RDF_TRIPLE_FORMATS:
-        return "triples"
-    if fmt in RDF_QUAD_FORMATS:
-        return "quads"
-    if fmt in TABULAR_FORMATS:
-        return "tabular"
-    raise ValueError(
-        f"Unknown format: '{fmt}'. Supported formats: {ALL_FORMATS}"
-    )
-
-
-def get_converted_filename(original_filename: str, convert_format: str) -> str:
-    """Generate output filename after format conversion.
-
-    Strips compression extension if present, then replaces the format
-    extension with the target format extension.
-
-    Args:
-        original_filename: Original file name (basename only, not full path).
-        convert_format: Target format name.
-
-    Returns:
-        New filename with updated extension.
-    """
-    name = original_filename
-
-    # strip compression extension
-    for ext in (".bz2", ".gz", ".xz"):
-        if name.lower().endswith(ext):
-            name = name[: -len(ext)]
-            break
-
-    # strip existing format extension (longest first)
-    for old_ext in sorted(FORMAT_TO_EXTENSION.values(), key=len, reverse=True):
-        if name.lower().endswith(old_ext):
-            name = name[: -len(old_ext)]
-            break
-
-    target_ext = FORMAT_TO_EXTENSION.get(convert_format, f".{convert_format}")
-    return name + target_ext
-
-
-# ---------------------------------------------------------------------------
-# Layer 2 Handlers
-# ---------------------------------------------------------------------------
-
-class TripleHandler:
-    """Handler for RDF triple formats (Layer 2).
-
-    Uses rdflib.Graph as the intermediate representation (IR).
-    Supports: ntriples, turtle, rdf-xml.
-
-    The IR returned by read() can be passed to future mapping classes
-    such as TripleToQuadMapper or TripleToTSDMapper for Layer 3 conversions.
-    """
-
-    def read(self, source: str, input_format: str) -> Graph:
-        """Parse an RDF triples file into a Graph (IR).
-
-        Args:
-            source: Path to input file.
-            input_format: Source format name (e.g. 'turtle', 'ntriples', 'rdf-xml').
-
-        Returns:
-            rdflib.Graph containing all parsed triples.
-
-        Raises:
-            ValueError: If input_format is not a recognised triple format.
-        """
-        if input_format not in RDF_TRIPLE_FORMATS:
-            raise ValueError(
-                f"'{input_format}' is not a triple format. "
-                f"Supported: {list(RDF_TRIPLE_FORMATS)}"
-            )
-        g = Graph()
-        g.parse(source, format=RDF_TRIPLE_FORMATS[input_format])
-        return g
-
-    def write(self, data: Graph, target: str, output_format: str) -> None:
-        """Serialize a Graph (IR) to a file.
-
-        Args:
-            data: rdflib.Graph to serialize.
-            target: Path to output file.
-            output_format: Target format name (e.g. 'ntriples', 'turtle').
-
-        Raises:
-            ValueError: If output_format is not a recognised triple format.
-        """
-        if output_format not in RDF_TRIPLE_FORMATS:
-            raise ValueError(
-                f"'{output_format}' is not a triple format. "
-                f"Supported: {list(RDF_TRIPLE_FORMATS)}"
-            )
-        # Explicitly specify utf-8 encoding to avoid NTSerializer warning
-        data.serialize(
-            destination=target,
-            format=RDF_TRIPLE_FORMATS[output_format],
-            encoding="utf-8",
-        )
-
-    def convert(
-        self,
-        source: str,
-        target: str,
-        input_format: str,
-        output_format: str,
-    ) -> None:
-        """Convert between RDF triple formats (Layer 2, lossless).
-
-        Chains read() -> write(). Both formats must be in the same
-        equivalence class (RDF triples).
-
-        Args:
-            source: Path to input file.
-            target: Path to output file.
-            input_format: Source format name.
-            output_format: Target format name.
-        """
-        graph = self.read(source, input_format)
-        self.write(graph, target, output_format)
-        print(
-            f"Converted {input_format} -> {output_format}: "
-            f"{os.path.basename(target)}"
-        )
-
-
-class QuadHandler:
-    """Handler for RDF quad formats (Layer 2).
+__all__ = ["convert_file", "get_converted_filename"]
 
-    Uses rdflib.Dataset as the intermediate representation (IR).
-    Supports: nquads, trig, trix, json-ld.
+convert_rdf_to_csv = _mapping.convert_rdf_to_csv
 
-    Named graph information is preserved through the Dataset IR.
-    The IR returned by read() can be passed to future mapping classes
-    such as QuadToTripleMapper or QuadToTSDMapper for Layer 3 conversions.
-    """
 
-    def read(self, source: str, input_format: str) -> Dataset:
-        """Parse an RDF quads file into a Dataset (IR).
-
-        Args:
-            source: Path to input file.
-            input_format: Source format name (e.g. 'nquads', 'trig', 'trix', 'json-ld').
-
-        Returns:
-            rdflib.Dataset containing all parsed quads with named graphs.
-
-        Raises:
-            ValueError: If input_format is not a recognised quad format.
-        """
-        if input_format not in RDF_QUAD_FORMATS:
-            raise ValueError(
-                f"'{input_format}' is not a quad format. "
-                f"Supported: {list(RDF_QUAD_FORMATS)}"
-            )
-        d = Dataset()
-        d.parse(source, format=RDF_QUAD_FORMATS[input_format])
-        return d
-
-    def write(self, data: Dataset, target: str, output_format: str) -> None:
-        """Serialize a Dataset (IR) to a file.
-
-        Args:
-            data: rdflib.Dataset to serialize.
-            target: Path to output file.
-            output_format: Target format name.
-
-        Raises:
-            ValueError: If output_format is not a recognised quad format.
-        """
-        if output_format not in RDF_QUAD_FORMATS:
-            raise ValueError(
-                f"'{output_format}' is not a quad format. "
-                f"Supported: {list(RDF_QUAD_FORMATS)}"
-            )
-        data.serialize(
-            destination=target,
-            format=RDF_QUAD_FORMATS[output_format],
-        )
-
-    def convert(
-        self,
-        source: str,
-        target: str,
-        input_format: str,
-        output_format: str,
-    ) -> None:
-        """Convert between RDF quad formats (Layer 2, lossless).
-
-        Chains read() -> write(). Both formats must be in the same
-        equivalence class (RDF quads). Named graph information is preserved.
-
-        Args:
-            source: Path to input file.
-            target: Path to output file.
-            input_format: Source format name.
-            output_format: Target format name.
-        """
-        dataset = self.read(source, input_format)
-        self.write(dataset, target, output_format)
-        print(
-            f"Converted {input_format} -> {output_format}: "
-            f"{os.path.basename(target)}"
-        )
-
-
-class TSDHandler:
-    """Handler for tabular structured data formats (Layer 2).
-
-    Uses list[list[str]] as the intermediate representation (IR).
-    Supports: csv, tsv.
-
-    The IR returned by read() can be passed to future mapping classes
-    such as TSDToTripleMapper for Layer 3 conversions.
-    """
-
-    def read(self, source: str, input_format: str) -> list:
-        """Parse a tabular file into a list of rows (IR).
-
-        Each row is a list of string values. First row is the header.
-
-        Args:
-            source: Path to input file.
-            input_format: Source format name ('csv' or 'tsv').
-
-        Returns:
-            list[list[str]] where first element is the header row.
-
-        Raises:
-            ValueError: If input_format is not a recognised tabular format.
-        """
-        if input_format not in TABULAR_FORMATS:
-            raise ValueError(
-                f"'{input_format}' is not a tabular format. "
-                f"Supported: {list(TABULAR_FORMATS)}"
-            )
-        delimiter = TABULAR_FORMATS[input_format]
-        with open(source, "r", newline="", encoding="utf-8") as f:
-            reader = csv.reader(f, delimiter=delimiter)
-            return list(reader)
-
-    def write(self, data: list, target: str, output_format: str) -> None:
-        """Serialize a list of rows (IR) to a tabular file.
-
-        Args:
-            data: list[list[str]] to write.
-            target: Path to output file.
-            output_format: Target format name ('csv' or 'tsv').
-
-        Raises:
-            ValueError: If output_format is not a recognised tabular format.
-        """
-        if output_format not in TABULAR_FORMATS:
-            raise ValueError(
-                f"'{output_format}' is not a tabular format. "
-                f"Supported: {list(TABULAR_FORMATS)}"
-            )
-        delimiter = TABULAR_FORMATS[output_format]
-        with open(target, "w", newline="", encoding="utf-8") as f:
-            writer = csv.writer(f, delimiter=delimiter)
-            writer.writerows(data)
-
-    def convert(
-        self,
-        source: str,
-        target: str,
-        input_format: str,
-        output_format: str,
-    ) -> None:
-        """Convert between tabular formats (Layer 2, lossless).
-
-        Chains read() -> write(). Both formats must be in the same
-        equivalence class (tabular).
-
-        Args:
-            source: Path to input file.
-            target: Path to output file.
-            input_format: Source format name.
-            output_format: Target format name.
-        """
-        rows = self.read(source, input_format)
-        self.write(rows, target, output_format)
-        print(
-            f"Converted {input_format} -> {output_format}: "
-            f"{os.path.basename(target)}"
-        )
-
-
-# ---------------------------------------------------------------------------
-# Layer 3 prototype — RDF triples to CSV (not yet fully implemented)
-# ---------------------------------------------------------------------------
-
-def convert_rdf_to_csv(
-    input_file: str,
-    output_file: str,
+def convert_rdf_triple_format(
+    source: str,
+    target: str,
     input_format: str,
+    output_format: str,
 ) -> None:
-    """Map RDF triples to a wide CSV table (Layer 3 prototype).
-
-    Each unique subject becomes a row. Each unique predicate becomes a column.
-    Multi-valued predicates are pipe-separated.
-    A companion .meta.json file is generated to preserve RDF datatype and
-    language tag information for lossless round trips.
-
-    NOTE: This is a Layer 3 prototype. It is not yet tested and will be
-    properly implemented in the Layer 3 issue.
-
-    Args:
-        input_file: Path to input RDF triples file.
-        output_file: Path to write output CSV file.
-        input_format: Source triple format name (must be in RDF_TRIPLE_FORMATS).
-    """
-    handler = TripleHandler()
-    g = handler.read(input_file, input_format)
-
-    predicates = sorted(set(str(p) for s, p, o in g))
-
-    subjects: dict = {}
-    column_metadata: dict = {}
-
-    for s, p, o in g:
-        subj = str(s)
-        pred = str(p)
-
-        if hasattr(o, "datatype") and o.datatype:
-            column_metadata[pred] = {"datatype": str(o.datatype)}
-        elif hasattr(o, "language") and o.language:
-            column_metadata[pred] = {"language": str(o.language)}
-
-        if subj not in subjects:
-            subjects[subj] = {}
-        if pred not in subjects[subj]:
-            subjects[subj][pred] = []
-        subjects[subj][pred].append(str(o))
-
-    tsd_handler = TSDHandler()
-    rows = [["resource"] + predicates]
-    for subj, pred_map in subjects.items():
-        row = [subj]
-        for pred in predicates:
-            values = pred_map.get(pred, [])
-            row.append("|".join(values))
-        rows.append(row)
-
-    tsd_handler.write(rows, output_file, "csv")
-
-    companion_file = output_file + ".meta.json"
-    with open(companion_file, "w", encoding="utf-8") as f:
-        json.dump({"columns": column_metadata}, f, indent=2)
-
-    print(f"Converted RDF -> CSV: {os.path.basename(output_file)}")
-    print(f"Companion metadata: {os.path.basename(companion_file)}")
+    _triple_handler.convert(source, target, input_format, output_format)
 
 
-# ---------------------------------------------------------------------------
-# Main dispatcher — called from download pipeline
-# ---------------------------------------------------------------------------
-
-# Handler instances — created once, reused
-_triple_handler = TripleHandler()
-_quad_handler = QuadHandler()
-_tsd_handler = TSDHandler()
-
-
-def convert_file(
-    input_file: str,
-    output_file: str,
-    convert_format: str,
+def convert_rdf_quad_format(
+    source: str,
+    target: str,
+    input_format: str,
+    output_format: str,
 ) -> None:
-    """Main conversion dispatcher called from the download pipeline.
-
-    Detects the input format from the file extension, determines whether
-    this is a Layer 2 (within-class) or Layer 3 (cross-class) conversion,
-    and delegates to the appropriate handler.
-
-    Args:
-        input_file: Path to the input file (must be decompressed).
-        output_file: Path to write the converted output file.
-        convert_format: Target format name (CLI format string).
-
-    Raises:
-        ValueError: If input format cannot be detected or conversion
-                    is not supported.
-    """
-    input_format = detect_format_from_filename(input_file)
+    _quad_handler.convert(source, target, input_format, output_format)
 
-    if input_format is None:
-        raise ValueError(
-            f"Could not detect input format from filename: "
-            f"'{os.path.basename(input_file)}'. "
-            f"Supported extensions: {list(EXTENSION_TO_FORMAT.keys())}"
-        )
 
-    if input_format == convert_format:
-        print(
-            f"WARNING: Input and target format are both '{input_format}'. "
-            "Skipping conversion."
-        )
-        return
-
-    input_class = get_format_class(input_format)
-    output_class = get_format_class(convert_format)
-
-    # --- Layer 2: within-class ---
-    if input_class == output_class:
-        if input_class == "triples":
-            _triple_handler.convert(
-                input_file, output_file, input_format, convert_format
-            )
-        elif input_class == "quads":
-            _quad_handler.convert(
-                input_file, output_file, input_format, convert_format
-            )
-        elif input_class == "tabular":
-            _tsd_handler.convert(
-                input_file, output_file, input_format, convert_format
-            )
-        return
-
-    # --- Layer 3: cross-class (prototype only) ---
-    if input_class == "triples" and output_class == "tabular":
-        convert_rdf_to_csv(input_file, output_file, input_format)
-        return
-
-    raise ValueError(
-        f"Conversion from '{input_format}' ({input_class}) to "
-        f"'{convert_format}' ({output_class}) is not yet implemented. "
-        f"Supported Layer 3 conversions: RDF Triples -> CSV/TSV."
-    )
\ No newline at end of file
+def convert_tabular_format(
+    source: str,
+    target: str,
+    input_format: str,
+    output_format: str,
+) -> None:
+    _tsd_handler.convert(source, target, input_format, output_format)
\ No newline at end of file
diff --git a/databusclient/api/download.py b/databusclient/api/download.py
index 7c33fac..414511a 100644
--- a/databusclient/api/download.py
+++ b/databusclient/api/download.py
@@ -5,6 +5,8 @@
 import lzma
 from typing import List, Optional, Tuple
 import re
+import shutil
+import tempfile
 from urllib.parse import urlparse
 
 import requests
@@ -16,7 +18,7 @@
     get_databus_id_parts_from_file_url,
     compute_sha256_and_length,
 )
-from databusclient.api.convert import convert_file, get_converted_filename
+from databusclient.filehandling.format import convert_file, get_converted_filename
 
 # Compression format mappings
 COMPRESSION_EXTENSIONS = {
@@ -508,87 +510,102 @@ def _download_file(
                     f"Checksum mismatch for {filename}: expected {expected_checksum}, got {actual}"
                 )
 
-    # --- 7. Convert compression format if requested (AFTER validation) ---
-    should_convert, source_format = _should_convert_file(file, convert_to, convert_from)
-    final_downloaded_file = filename
-    if should_convert and source_format:
-        target_filename = _get_converted_filename(file, source_format, convert_to)
-        target_filepath = os.path.join(localDir, target_filename)
-        _convert_compression_format(
-            filename, target_filepath, source_format, convert_to
-        )
-        final_downloaded_file = target_filepath
-
-    # --- 8. Convert file format if requested (AFTER compression conversion) ---
-    # Pipeline follows :decompress -> convert format -> recompress
-    # If the source was compressed, the converted output is recompressed:
-    #   - to the format specified by --convert-to if provided
-    #   - to the original compression format otherwise
-    if convert_format:
-        final_basename = os.path.basename(final_downloaded_file)
-        compression_fmt = _detect_compression_format(final_basename)
-
-        if compression_fmt:
-            # File is still compressed — decompress to temp, convert, recompress
-            compression_ext = COMPRESSION_EXTENSIONS[compression_fmt]
-            if final_downloaded_file.lower().endswith(compression_ext):
-                temp_decompressed = final_downloaded_file[:-len(compression_ext)]
-            else:
-                temp_decompressed = final_downloaded_file + ".decompressed"
-
-            try:
-                print(
-                    f"Decompressing {final_basename} before format conversion..."
-                )
-                source_module = COMPRESSION_MODULES[compression_fmt]
-                with source_module.open(final_downloaded_file, "rb") as sf:
-                    with open(temp_decompressed, "wb") as tf:
-                        while True:
-                            chunk = sf.read(8192)
-                            if not chunk:
-                                break
-                            tf.write(chunk)
-
-                # Convert format on the decompressed temp file
-                converted_basename = get_converted_filename(
-                    final_basename, convert_format
-                )
-                converted_filepath = os.path.join(localDir, converted_basename)
-                convert_file(temp_decompressed, converted_filepath, convert_format)
+    # --- 7. Unified compression/format conversion pass ---
+    source_compression = _detect_compression_format(file)
+    should_convert_compression, source_format_for_convert_to = _should_convert_file(
+        file, convert_to, convert_from
+    )
+    needs_format_conversion = convert_format is not None
 
-                # Recompress the converted output.
-                # Use --convert-to format if specified, otherwise use original compression.
-                recompress_fmt = convert_to if convert_to else compression_fmt
-                recompress_ext = COMPRESSION_EXTENSIONS[recompress_fmt]
-                recompressed_filepath = converted_filepath + recompress_ext
-                recompress_module = COMPRESSION_MODULES[recompress_fmt]
+    if not should_convert_compression and not needs_format_conversion:
+        return
 
-                print(
-                    f"Recompressing converted file to {recompress_fmt}: "
-                    f"{os.path.basename(recompressed_filepath)}"
-                )
-                with open(converted_filepath, "rb") as sf:
-                    with recompress_module.open(recompressed_filepath, "wb") as tf:
-                        while True:
-                            chunk = sf.read(8192)
-                            if not chunk:
-                                break
-                            tf.write(chunk)
-
-                # Remove the uncompressed converted file — keep only recompressed
-                if os.path.exists(converted_filepath):
-                    os.remove(converted_filepath)
-
-            finally:
-                # Always clean up temp decompressed file
-                if os.path.exists(temp_decompressed):
-                    os.remove(temp_decompressed)
+    temp_paths: list[str] = []
+    try:
+        # Compression-only path keeps existing conversion message behavior.
+        # Use a temp copy so the original downloaded file remains unchanged.
+        if should_convert_compression and not needs_format_conversion:
+            target_filename = _get_converted_filename(
+                file, source_format_for_convert_to, convert_to
+            )
+            target_filepath = os.path.join(localDir, target_filename)
+
+            with tempfile.NamedTemporaryFile(
+                delete=False,
+                suffix=COMPRESSION_EXTENSIONS[source_format_for_convert_to],
+                dir=localDir,
+            ) as temp_source_copy:
+                source_copy_path = temp_source_copy.name
+            temp_paths.append(source_copy_path)
+
+            shutil.copyfile(filename, source_copy_path)
+            _convert_compression_format(
+                source_copy_path,
+                target_filepath,
+                source_format_for_convert_to,
+                convert_to,
+            )
+            return
 
+        # Determine input for format conversion.
+        # If source is compressed, decompress once to a safe temporary file.
+        conversion_input_path = filename
+        if source_compression is not None:
+            source_ext = COMPRESSION_EXTENSIONS[source_compression]
+            stripped_name = file
+            if stripped_name.lower().endswith(source_ext):
+                stripped_name = stripped_name[: -len(source_ext)]
+            _, format_ext = os.path.splitext(stripped_name)
+
+            with tempfile.NamedTemporaryFile(
+                delete=False,
+                suffix=format_ext,
+                dir=localDir,
+            ) as temp_decompressed:
+                temp_decompressed_path = temp_decompressed.name
+            temp_paths.append(temp_decompressed_path)
+
+            print(f"Decompressing {file}...")
+            with COMPRESSION_MODULES[source_compression].open(filename, "rb") as sf:
+                with open(temp_decompressed_path, "wb") as tf:
+                    shutil.copyfileobj(sf, tf)
+
+            conversion_input_path = temp_decompressed_path
+
+        # Convert format on uncompressed input.
+        converted_basename = get_converted_filename(file, convert_format)
+        converted_uncompressed_path = os.path.join(localDir, converted_basename)
+        convert_file(conversion_input_path, converted_uncompressed_path, convert_format)
+
+        # Recompress converted output when needed.
+        if source_compression is not None:
+            if should_convert_compression and convert_to:
+                final_compression = convert_to
+            else:
+                final_compression = source_compression
+        elif should_convert_compression and convert_to:
+            final_compression = convert_to
         else:
-            # File is already uncompressed — convert directly, no recompression needed
-            converted_filename = get_converted_filename(final_basename, convert_format)
-            converted_filepath = os.path.join(localDir, converted_filename)
-            convert_file(final_downloaded_file, converted_filepath, convert_format)
+            final_compression = None
+
+        if final_compression is not None:
+            recompressed_path = (
+                converted_uncompressed_path + COMPRESSION_EXTENSIONS[final_compression]
+            )
+            print(
+                f"Recompressing {os.path.basename(converted_uncompressed_path)} -> {os.path.basename(recompressed_path)}..."
+            )
+            with open(converted_uncompressed_path, "rb") as sf:
+                with COMPRESSION_MODULES[final_compression].open(
+                    recompressed_path, "wb"
+                ) as tf:
+                    shutil.copyfileobj(sf, tf)
+
+            os.remove(converted_uncompressed_path)
+    finally:
+        for temp_path in temp_paths:
+            if os.path.exists(temp_path):
+                os.remove(temp_path)
 
 
 def _download_files(
diff --git a/databusclient/filehandling/format.py b/databusclient/filehandling/format.py
new file mode 100644
index 0000000..1b625b8
--- /dev/null
+++ b/databusclient/filehandling/format.py
@@ -0,0 +1,511 @@
+"""Format and Mapping Conversion Layer.
+
+This module implements the format conversion pipeline for the Databus Python Client
+
+Layer 2: Within-class format conversion (lossless).
+    - TripleHandler: RDF triple formats (turtle, ntriples, rdf-xml)
+    - QuadHandler:   RDF quad formats (nquads, trig, trix, json-ld)
+    - TSDHandler:    Tabular formats (csv, tsv)
+
+Each handler provides read() -> IR, write(IR) -> file, convert() -> chains both.
+The IR (intermediate representation) returned by read() is designed to be passed
+to future mapping classes (TripleToQuadMapper, TripleToTSDMapper, etc.).
+"""
+
+import csv
+import os
+import shutil
+import warnings
+from typing import Optional
+
+from rdflib import Dataset, Graph
+
+# Suppress rdflib internal DeprecationWarning for Dataset API.
+# rdflib is mid-migration from ConjunctiveGraph to Dataset in 7.x.
+# These warnings originate from rdflib internals, not our code.
+# Can be removed when rdflib completes their Dataset API migration.
+warnings.filterwarnings("ignore", category=DeprecationWarning, module="rdflib")
+warnings.filterwarnings("ignore", category=UserWarning, module="rdflib")
+
+
+# ---------------------------------------------------------------------------
+# Format registries
+# ---------------------------------------------------------------------------
+
+# Maps CLI format name -> rdflib format string
+RDF_TRIPLE_FORMATS = {
+    "ntriples": "ntriples",
+    "turtle": "turtle",
+    "rdf-xml": "xml",
+}
+
+RDF_QUAD_FORMATS = {
+    "nquads": "nquads",
+    "trig": "trig",
+    "trix": "trix",
+    "json-ld": "json-ld",
+}
+
+TABULAR_FORMATS = {
+    "csv": ",",
+    "tsv": "\t",
+}
+
+ALL_FORMATS = (
+    list(RDF_TRIPLE_FORMATS)
+    + list(RDF_QUAD_FORMATS)
+    + list(TABULAR_FORMATS)
+)
+
+# Maps file extension -> CLI format name
+EXTENSION_TO_FORMAT = {
+    ".ttl": "turtle",
+    ".nt": "ntriples",
+    ".rdf": "rdf-xml",
+    ".xml": "rdf-xml",
+    ".owl": "rdf-xml",
+    ".nq": "nquads",
+    ".trig": "trig",
+    ".trix": "trix",
+    ".jsonld": "json-ld",
+    ".json": "json-ld",
+    ".csv": "csv",
+    ".tsv": "tsv",
+}
+
+# Maps format name -> file extension
+FORMAT_TO_EXTENSION = {
+    "ntriples": ".nt",
+    "turtle": ".ttl",
+    "rdf-xml": ".rdf",
+    "nquads": ".nq",
+    "trig": ".trig",
+    "trix": ".trix",
+    "json-ld": ".jsonld",
+    "csv": ".csv",
+    "tsv": ".tsv",
+}
+
+
+# ---------------------------------------------------------------------------
+# Format detection helpers
+# ---------------------------------------------------------------------------
+
+def detect_format_from_filename(filename: str) -> Optional[str]:
+    """Detect format from file extension, ignoring compression extensions.
+
+    Args:
+        filename: File name or path.
+
+    Returns:
+        Format name string or None if not detectable.
+    """
+    name = filename.lower()
+
+    # strip compression extension first
+    for ext in (".bz2", ".gz", ".xz"):
+        if name.endswith(ext):
+            name = name[: -len(ext)]
+            break
+
+    # match longest extension first to avoid .json matching before .jsonld
+    for ext in sorted(EXTENSION_TO_FORMAT.keys(), key=len, reverse=True):
+        if name.endswith(ext):
+            return EXTENSION_TO_FORMAT[ext]
+
+    return None
+
+
+def get_format_class(fmt: str) -> str:
+    """Return equivalence class for a format name.
+
+    Args:
+        fmt: Format name (e.g. 'turtle', 'nquads', 'csv').
+
+    Returns:
+        'triples', 'quads', or 'tabular'.
+
+    Raises:
+        ValueError: If format is not recognised.
+    """
+    if fmt in RDF_TRIPLE_FORMATS:
+        return "triples"
+    if fmt in RDF_QUAD_FORMATS:
+        return "quads"
+    if fmt in TABULAR_FORMATS:
+        return "tabular"
+    raise ValueError(
+        f"Unknown format: '{fmt}'. Supported formats: {ALL_FORMATS}"
+    )
+
+
+def get_converted_filename(original_filename: str, convert_format: str) -> str:
+    """Generate output filename after format conversion.
+
+    Strips compression extension if present, then replaces the format
+    extension with the target format extension.
+
+    Args:
+        original_filename: Original file name (basename only, not full path).
+        convert_format: Target format name.
+
+    Returns:
+        New filename with updated extension.
+    """
+    name = original_filename
+
+    # strip compression extension
+    for ext in (".bz2", ".gz", ".xz"):
+        if name.lower().endswith(ext):
+            name = name[: -len(ext)]
+            break
+
+    # strip existing format extension (longest first)
+    for old_ext in sorted(FORMAT_TO_EXTENSION.values(), key=len, reverse=True):
+        if name.lower().endswith(old_ext):
+            name = name[: -len(old_ext)]
+            break
+
+    target_ext = FORMAT_TO_EXTENSION.get(convert_format, f".{convert_format}")
+    return name + target_ext
+
+
+# ---------------------------------------------------------------------------
+# Layer 2 Handlers
+# ---------------------------------------------------------------------------
+
+class TripleHandler:
+    """Handler for RDF triple formats (Layer 2).
+
+    Uses rdflib.Graph as the intermediate representation (IR).
+    Supports: ntriples, turtle, rdf-xml.
+
+    The IR returned by read() can be passed to future mapping classes
+    such as TripleToQuadMapper or TripleToTSDMapper for Layer 3 conversions.
+    """
+
+    def read(self, source: str, input_format: str) -> Graph:
+        """Parse an RDF triples file into a Graph (IR).
+
+        Args:
+            source: Path to input file.
+            input_format: Source format name (e.g. 'turtle', 'ntriples', 'rdf-xml').
+
+        Returns:
+            rdflib.Graph containing all parsed triples.
+
+        Raises:
+            ValueError: If input_format is not a recognised triple format.
+        """
+        if input_format not in RDF_TRIPLE_FORMATS:
+            raise ValueError(
+                f"'{input_format}' is not a triple format. "
+                f"Supported: {list(RDF_TRIPLE_FORMATS)}"
+            )
+        g = Graph()
+        g.parse(source, format=RDF_TRIPLE_FORMATS[input_format])
+        return g
+
+    def write(self, data: Graph, target: str, output_format: str) -> None:
+        """Serialize a Graph (IR) to a file.
+
+        Args:
+            data: rdflib.Graph to serialize.
+            target: Path to output file.
+            output_format: Target format name (e.g. 'ntriples', 'turtle').
+
+        Raises:
+            ValueError: If output_format is not a recognised triple format.
+        """
+        if output_format not in RDF_TRIPLE_FORMATS:
+            raise ValueError(
+                f"'{output_format}' is not a triple format. "
+                f"Supported: {list(RDF_TRIPLE_FORMATS)}"
+            )
+        parent = os.path.dirname(target)
+        if parent:
+            os.makedirs(parent, exist_ok=True)
+        # Explicitly specify utf-8 encoding to avoid NTSerializer warning
+        data.serialize(
+            destination=target,
+            format=RDF_TRIPLE_FORMATS[output_format],
+            encoding="utf-8",
+        )
+
+    def convert(
+        self,
+        source: str,
+        target: str,
+        input_format: str,
+        output_format: str,
+    ) -> None:
+        """Convert between RDF triple formats (Layer 2, lossless).
+
+        Chains read() -> write(). Both formats must be in the same
+        equivalence class (RDF triples).
+
+        Args:
+            source: Path to input file.
+            target: Path to output file.
+            input_format: Source format name.
+            output_format: Target format name.
+        """
+        graph = self.read(source, input_format)
+        self.write(graph, target, output_format)
+        print(
+            f"Converted {input_format} -> {output_format}: "
+            f"{os.path.basename(target)}"
+        )
+
+
+class QuadHandler:
+    """Handler for RDF quad formats (Layer 2).
+
+    Uses rdflib.Dataset as the intermediate representation (IR).
+    Supports: nquads, trig, trix, json-ld.
+
+    Named graph information is preserved through the Dataset IR.
+    The IR returned by read() can be passed to future mapping classes
+    such as QuadToTripleMapper or QuadToTSDMapper for Layer 3 conversions.
+    """
+
+    def read(self, source: str, input_format: str) -> Dataset:
+        """Parse an RDF quads file into a Dataset (IR).
+
+        Args:
+            source: Path to input file.
+            input_format: Source format name (e.g. 'nquads', 'trig', 'trix', 'json-ld').
+
+        Returns:
+            rdflib.Dataset containing all parsed quads with named graphs.
+
+        Raises:
+            ValueError: If input_format is not a recognised quad format.
+        """
+        if input_format not in RDF_QUAD_FORMATS:
+            raise ValueError(
+                f"'{input_format}' is not a quad format. "
+                f"Supported: {list(RDF_QUAD_FORMATS)}"
+            )
+        d = Dataset()
+        d.parse(source, format=RDF_QUAD_FORMATS[input_format])
+        return d
+
+    def write(self, data: Dataset, target: str, output_format: str) -> None:
+        """Serialize a Dataset (IR) to a file.
+
+        Args:
+            data: rdflib.Dataset to serialize.
+            target: Path to output file.
+            output_format: Target format name.
+
+        Raises:
+            ValueError: If output_format is not a recognised quad format.
+        """
+        if output_format not in RDF_QUAD_FORMATS:
+            raise ValueError(
+                f"'{output_format}' is not a quad format. "
+                f"Supported: {list(RDF_QUAD_FORMATS)}"
+            )
+        parent = os.path.dirname(target)
+        if parent:
+            os.makedirs(parent, exist_ok=True)
+        data.serialize(
+            destination=target,
+            format=RDF_QUAD_FORMATS[output_format],
+        )
+
+    def convert(
+        self,
+        source: str,
+        target: str,
+        input_format: str,
+        output_format: str,
+    ) -> None:
+        """Convert between RDF quad formats (Layer 2, lossless).
+
+        Chains read() -> write(). Both formats must be in the same
+        equivalence class (RDF quads). Named graph information is preserved.
+
+        Args:
+            source: Path to input file.
+            target: Path to output file.
+            input_format: Source format name.
+            output_format: Target format name.
+        """
+        dataset = self.read(source, input_format)
+        self.write(dataset, target, output_format)
+        print(
+            f"Converted {input_format} -> {output_format}: "
+            f"{os.path.basename(target)}"
+        )
+
+
+class TSDHandler:
+    """Handler for tabular structured data formats (Layer 2).
+
+    Uses list[list[str]] as the intermediate representation (IR).
+    Supports: csv, tsv.
+
+    The IR returned by read() can be passed to future mapping classes
+    such as TSDToTripleMapper for Layer 3 conversions.
+    """
+
+    def read(self, source: str, input_format: str) -> list:
+        """Parse a tabular file into a list of rows (IR).
+
+        Each row is a list of string values. First row is the header.
+
+        Args:
+            source: Path to input file.
+            input_format: Source format name ('csv' or 'tsv').
+
+        Returns:
+            list[list[str]] where first element is the header row.
+
+        Raises:
+            ValueError: If input_format is not a recognised tabular format.
+        """
+        if input_format not in TABULAR_FORMATS:
+            raise ValueError(
+                f"'{input_format}' is not a tabular format. "
+                f"Supported: {list(TABULAR_FORMATS)}"
+            )
+        delimiter = TABULAR_FORMATS[input_format]
+        with open(source, "r", newline="", encoding="utf-8") as f:
+            reader = csv.reader(f, delimiter=delimiter)
+            return list(reader)
+
+    def write(self, data: list, target: str, output_format: str) -> None:
+        """Serialize a list of rows (IR) to a tabular file.
+
+        Args:
+            data: list[list[str]] to write.
+            target: Path to output file.
+            output_format: Target format name ('csv' or 'tsv').
+
+        Raises:
+            ValueError: If output_format is not a recognised tabular format.
+        """
+        if output_format not in TABULAR_FORMATS:
+            raise ValueError(
+                f"'{output_format}' is not a tabular format. "
+                f"Supported: {list(TABULAR_FORMATS)}"
+            )
+        parent = os.path.dirname(target)
+        if parent:
+            os.makedirs(parent, exist_ok=True)
+        delimiter = TABULAR_FORMATS[output_format]
+        with open(target, "w", newline="", encoding="utf-8") as f:
+            writer = csv.writer(f, delimiter=delimiter)
+            writer.writerows(data)
+
+    def convert(
+        self,
+        source: str,
+        target: str,
+        input_format: str,
+        output_format: str,
+    ) -> None:
+        """Convert between tabular formats (Layer 2, lossless).
+
+        Chains read() -> write(). Both formats must be in the same
+        equivalence class (tabular).
+
+        Args:
+            source: Path to input file.
+            target: Path to output file.
+            input_format: Source format name.
+            output_format: Target format name.
+        """
+        rows = self.read(source, input_format)
+        self.write(rows, target, output_format)
+        print(
+            f"Converted {input_format} -> {output_format}: "
+            f"{os.path.basename(target)}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Main dispatcher — called from download pipeline
+# ---------------------------------------------------------------------------
+
+# Handler instances — created once, reused
+_triple_handler = TripleHandler()
+_quad_handler = QuadHandler()
+_tsd_handler = TSDHandler()
+
+
+def convert_file(
+    input_file: str,
+    output_file: str,
+    convert_format: str,
+) -> None:
+    """Main conversion dispatcher called from the download pipeline.
+
+    Detects the input format from the file extension, determines whether
+    this is a Layer 2 (within-class) or Layer 3 (cross-class) conversion,
+    and delegates to the appropriate handler.
+
+    Args:
+        input_file: Path to the input file (must be decompressed).
+        output_file: Path to write the converted output file.
+        convert_format: Target format name (CLI format string).
+
+    Raises:
+        ValueError: If input format cannot be detected or conversion
+                    is not supported.
+    """
+    input_format = detect_format_from_filename(input_file)
+
+    if input_format is None:
+        raise ValueError(
+            f"Could not detect input format from filename: "
+            f"'{os.path.basename(input_file)}'. "
+            f"Supported extensions: {list(EXTENSION_TO_FORMAT.keys())}"
+        )
+
+    if input_format == convert_format:
+        # Input and target format are identical.
+        # Copy input to output path so the caller always receives an output file.
+        # This is important for the download pipeline which expects an output
+        # file to exist after convert_file() returns — e.g. for recompression.
+        if input_file != output_file:
+            shutil.copy2(input_file, output_file)
+            print(
+                f"Input and target format are both '{input_format}'. "
+                f"Copied to output path: {os.path.basename(output_file)}"
+            )
+        return
+
+    input_class = get_format_class(input_format)
+    output_class = get_format_class(convert_format)
+
+    # --- Layer 2: within-class ---
+    if input_class == output_class:
+        if input_class == "triples":
+            _triple_handler.convert(
+                input_file, output_file, input_format, convert_format
+            )
+        elif input_class == "quads":
+            _quad_handler.convert(
+                input_file, output_file, input_format, convert_format
+            )
+        elif input_class == "tabular":
+            _tsd_handler.convert(
+                input_file, output_file, input_format, convert_format
+            )
+        return
+
+    # --- Layer 3: cross-class (prototype only) ---
+    if input_class == "triples" and output_class == "tabular":
+        from databusclient.filehandling.mapping import convert_rdf_to_csv
+
+        convert_rdf_to_csv(input_file, output_file, input_format)
+        return
+
+    raise ValueError(
+        f"Conversion from '{input_format}' ({input_class}) to "
+        f"'{convert_format}' ({output_class}) is not yet implemented. "
+        f"Supported Layer 3 conversions: RDF Triples -> CSV/TSV."
+    )
diff --git a/databusclient/filehandling/mapping.py b/databusclient/filehandling/mapping.py
new file mode 100644
index 0000000..93b5a00
--- /dev/null
+++ b/databusclient/filehandling/mapping.py
@@ -0,0 +1,68 @@
+"""Layer 3 prototype mapping handlers."""
+
+import json
+import os
+
+from databusclient.filehandling.format import TSDHandler, TripleHandler
+
+
+def convert_rdf_to_csv(
+    input_file: str,
+    output_file: str,
+    input_format: str,
+) -> None:
+    """Map RDF triples to a wide CSV table (Layer 3 prototype).
+
+    Each unique subject becomes a row. Each unique predicate becomes a column.
+    Multi-valued predicates are pipe-separated.
+    A companion .meta.json file is generated to preserve RDF datatype and
+    language tag information for lossless round trips.
+
+    NOTE: This is a Layer 3 prototype. It is not yet tested and will be
+    properly implemented in the Layer 3 issue.
+
+    Args:
+        input_file: Path to input RDF triples file.
+        output_file: Path to write output CSV file.
+        input_format: Source triple format name (must be in RDF_TRIPLE_FORMATS).
+    """
+    handler = TripleHandler()
+    g = handler.read(input_file, input_format)
+
+    predicates = sorted(set(str(p) for s, p, o in g))
+
+    subjects: dict = {}
+    column_metadata: dict = {}
+
+    for s, p, o in g:
+        subj = str(s)
+        pred = str(p)
+
+        if hasattr(o, "datatype") and o.datatype:
+            column_metadata[pred] = {"datatype": str(o.datatype)}
+        elif hasattr(o, "language") and o.language:
+            column_metadata[pred] = {"language": str(o.language)}
+
+        if subj not in subjects:
+            subjects[subj] = {}
+        if pred not in subjects[subj]:
+            subjects[subj][pred] = []
+        subjects[subj][pred].append(str(o))
+
+    tsd_handler = TSDHandler()
+    rows = [["resource"] + predicates]
+    for subj, pred_map in subjects.items():
+        row = [subj]
+        for pred in predicates:
+            values = pred_map.get(pred, [])
+            row.append("|".join(values))
+        rows.append(row)
+
+    tsd_handler.write(rows, output_file, "csv")
+
+    companion_file = output_file + ".meta.json"
+    with open(companion_file, "w", encoding="utf-8") as f:
+        json.dump({"columns": column_metadata}, f, indent=2)
+
+    print(f"Converted RDF -> CSV: {os.path.basename(output_file)}")
+    print(f"Companion metadata: {os.path.basename(companion_file)}")
diff --git a/pyproject.toml b/pyproject.toml
index 72179cc..9759c07 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,9 @@ databusclient = "databusclient.cli:app"
 target-version = "py311"
 src = ["databusclient", "tests"]
 
+[tool.ruff.lint.per-file-ignores]
+"tests/test_format_round_trips.py" = ["F841"]
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
diff --git a/run_all_conversion_tests.py b/run_all_conversion_tests.py
index 384e052..98f9bbb 100644
--- a/run_all_conversion_tests.py
+++ b/run_all_conversion_tests.py
@@ -5,6 +5,10 @@
 Test file for testing with real datasets from databus.
 """
 
+# TODO: This script is a temporary manual integration test artifact.
+# It must be removed or rewritten as proper pytest integration tests
+# before the final PR. Do not commit this file to the upstream repo.
+
 import os
 from databusclient.api.convert import (
     convert_rdf_triple_format,

From 180c255ed3961f454d8af1b71bdff03101d8284f Mon Sep 17 00:00:00 2001
From: DhanashreePetare <dhanashreepetare8125@gmail.com>
Date: Fri, 12 Jun 2026 17:12:11 +0530
Subject: [PATCH 4/7] Review comments resolved under issue #59

---
 databusclient/api/download.py        |  7 ++++
 databusclient/cli.py                 | 18 ++++++++--
 databusclient/filehandling/format.py | 51 ++++++++++++++++++++++++++--
 3 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/databusclient/api/download.py b/databusclient/api/download.py
index 414511a..d7ec030 100644
--- a/databusclient/api/download.py
+++ b/databusclient/api/download.py
@@ -577,6 +577,13 @@ def _download_file(
         converted_uncompressed_path = os.path.join(localDir, converted_basename)
         convert_file(conversion_input_path, converted_uncompressed_path, convert_format)
 
+        # Delete the original downloaded file after successful format conversion,
+        # unless the converted output is the same file (same format, same path).
+        if os.path.abspath(filename) != os.path.abspath(converted_uncompressed_path):
+            if os.path.exists(filename):
+                os.remove(filename)
+                print(f"Removed original file: {os.path.basename(filename)}")
+
         # Recompress converted output when needed.
         if source_compression is not None:
             if should_convert_compression and convert_to:
diff --git a/databusclient/cli.py b/databusclient/cli.py
index c687616..e998d4e 100644
--- a/databusclient/cli.py
+++ b/databusclient/cli.py
@@ -190,13 +190,25 @@ def deploy(
     help="Source compression format to convert from (optional filter). Only files with this compression will be converted.",
 )
 @click.option(
-    "--convert-format",
+    "--format",
     "convert_format",
     type=click.Choice(
-        ["ntriples","turtle","rdf-xml","nquads","trig","trix","json-ld","csv","tsv"],
+        [
+            "ntriples", "nt",
+            "turtle", "ttl",
+            "rdf-xml", "rdf", "xml",
+            "nquads", "nq",
+            "trig",
+            "trix",
+            "json-ld", "jsonld",
+            "csv",
+            "tsv",
+        ],
         case_sensitive=False,
     ),
-    help="Target format for on-the-fly format conversion during download (Layer 2 and Layer 3).",
+    help="Target format for on-the-fly format conversion during download (Layer 2 and Layer 3). "
+         "Accepts full names (ntriples, turtle, rdf-xml, nquads, trig, trix, json-ld, csv, tsv) "
+         "or short aliases (nt, ttl, rdf, xml, nq, jsonld).",
 )
 @click.option(
     "--validate-checksum", is_flag=True, help="Validate checksums of downloaded files"
diff --git a/databusclient/filehandling/format.py b/databusclient/filehandling/format.py
index 1b625b8..7c40109 100644
--- a/databusclient/filehandling/format.py
+++ b/databusclient/filehandling/format.py
@@ -57,6 +57,42 @@
     + list(TABULAR_FORMATS)
 )
 
+# Maps short CLI aliases -> canonical format name
+FORMAT_ALIASES = {
+    "nt": "ntriples",
+    "ttl": "turtle",
+    "rdf": "rdf-xml",
+    "xml": "rdf-xml",
+    "nq": "nquads",
+    "jsonld": "json-ld",
+}
+
+def normalize_format(fmt: str) -> str:
+    """Normalize a format name or alias to its canonical form.
+
+    Accepts both full names (e.g. 'ntriples') and short aliases (e.g. 'nt').
+    Canonical names pass through unchanged. Unknown values raise ValueError.
+
+    Args:
+        fmt: Format name or alias string (case-insensitive).
+
+    Returns:
+        Canonical format name string.
+
+    Raises:
+        ValueError: If fmt is not a recognised format name or alias.
+    """
+    fmt_lower = fmt.lower()
+    # Resolve alias first
+    canonical = FORMAT_ALIASES.get(fmt_lower, fmt_lower)
+    if canonical not in ALL_FORMATS:
+        raise ValueError(
+            f"Unknown format: '{fmt}'. "
+            f"Supported formats: {ALL_FORMATS}. "
+            f"Supported aliases: {list(FORMAT_ALIASES.keys())}"
+        )
+    return canonical
+
 # Maps file extension -> CLI format name
 EXTENSION_TO_FORMAT = {
     ".ttl": "turtle",
@@ -143,15 +179,18 @@ def get_converted_filename(original_filename: str, convert_format: str) -> str:
     """Generate output filename after format conversion.
 
     Strips compression extension if present, then replaces the format
-    extension with the target format extension.
+    extension with the target format extension. Accepts format aliases.
 
     Args:
         original_filename: Original file name (basename only, not full path).
-        convert_format: Target format name.
+        convert_format: Target format name or alias.
 
     Returns:
         New filename with updated extension.
     """
+    # Normalize alias to canonical name
+    convert_format = normalize_format(convert_format)
+
     name = original_filename
 
     # strip compression extension
@@ -447,15 +486,21 @@ def convert_file(
     this is a Layer 2 (within-class) or Layer 3 (cross-class) conversion,
     and delegates to the appropriate handler.
 
+    Accepts both canonical format names and short aliases (e.g. 'nt' for
+    'ntriples', 'ttl' for 'turtle'). See normalize_format() for full list.
+
     Args:
         input_file: Path to the input file (must be decompressed).
         output_file: Path to write the converted output file.
-        convert_format: Target format name (CLI format string).
+        convert_format: Target format name or alias (CLI format string).
 
     Raises:
         ValueError: If input format cannot be detected or conversion
                     is not supported.
     """
+    # Normalize alias to canonical name before any processing
+    convert_format = normalize_format(convert_format)
+
     input_format = detect_format_from_filename(input_file)
 
     if input_format is None:

From 34039f23a616e2e9df2e3fa4e66b8888fc47a06e Mon Sep 17 00:00:00 2001
From: DhanashreePetare <dhanashreepetare8125@gmail.com>
Date: Sat, 13 Jun 2026 18:36:26 +0530
Subject: [PATCH 5/7] #61: replacing --convert_from & --convert_to with
 --compression

---
 databusclient/api/download.py        | 123 +++++++++++----------------
 databusclient/cli.py                 |  18 ++--
 tests/test_compression_conversion.py |  41 +++++----
 3 files changed, 79 insertions(+), 103 deletions(-)

diff --git a/databusclient/api/download.py b/databusclient/api/download.py
index d7ec030..56cf07c 100644
--- a/databusclient/api/download.py
+++ b/databusclient/api/download.py
@@ -50,20 +50,23 @@ def _detect_compression_format(filename: str) -> Optional[str]:
     return None
 
 
-def _should_convert_file(
-    filename: str, convert_to: Optional[str], convert_from: Optional[str]
+def _should_convert_compression(
+    filename: str, compression: Optional[str]
 ) -> Tuple[bool, Optional[str]]:
-    """Determine if a file should be converted and what the source format is.
+    """Determine if a file should have its compression format converted.
+
+    Source compression is detected automatically from the file extension.
+    All compressed files will be converted to the target format regardless
+    of their source compression format.
 
     Args:
         filename: Name of the file.
-        convert_to: Target compression format ('bz2', 'gz', 'xz').
-        convert_from: Optional source compression format filter.
+        compression: Target compression format ('bz2', 'gz', 'xz') or None.
 
     Returns:
         Tuple of (should_convert: bool, source_format: Optional[str]).
     """
-    if not convert_to:
+    if not compression:
         return False, None
 
     source_format = _detect_compression_format(filename)
@@ -73,11 +76,7 @@ def _should_convert_file(
         return False, None
 
     # If source and target are the same, skip conversion
-    if source_format == convert_to:
-        return False, None
-
-    # If convert_from is specified, only convert matching formats
-    if convert_from and source_format != convert_from:
+    if source_format == compression:
         return False, None
 
     return True, source_format
@@ -314,8 +313,7 @@ def _download_file(
     databus_key=None,
     auth_url=None,
     client_id=None,
-    convert_to=None,
-    convert_from=None,
+    compression=None,
     convert_format=None,
     validate_checksum: bool = False,
     expected_checksum: str | None = None,
@@ -329,8 +327,8 @@ def _download_file(
         databus_key: Databus API key for protected downloads.
         auth_url: Keycloak token endpoint URL.
         client_id: Client ID for token exchange.
-        convert_to: Target compression format for on-the-fly conversion.
-        convert_from: Optional source compression format filter.
+        compression: Target compression format for on-the-fly conversion.
+                     Source compression is auto-detected from the file extension.
         convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
         expected_checksum: The expected checksum of the file.
@@ -354,6 +352,7 @@ def _download_file(
     dirpath = os.path.dirname(filename)
     if dirpath:
         os.makedirs(dirpath, exist_ok=True)  # Create the necessary directories
+
     # --- 1. Get redirect URL by requesting HEAD ---
     headers = {}
 
@@ -512,8 +511,8 @@ def _download_file(
 
     # --- 7. Unified compression/format conversion pass ---
     source_compression = _detect_compression_format(file)
-    should_convert_compression, source_format_for_convert_to = _should_convert_file(
-        file, convert_to, convert_from
+    should_convert_compression, source_fmt = _should_convert_compression(
+        file, compression
     )
     needs_format_conversion = convert_format is not None
 
@@ -525,14 +524,12 @@ def _download_file(
         # Compression-only path keeps existing conversion message behavior.
         # Use a temp copy so the original downloaded file remains unchanged.
         if should_convert_compression and not needs_format_conversion:
-            target_filename = _get_converted_filename(
-                file, source_format_for_convert_to, convert_to
-            )
+            target_filename = _get_converted_filename(file, source_fmt, compression)
             target_filepath = os.path.join(localDir, target_filename)
 
             with tempfile.NamedTemporaryFile(
                 delete=False,
-                suffix=COMPRESSION_EXTENSIONS[source_format_for_convert_to],
+                suffix=COMPRESSION_EXTENSIONS[source_fmt],
                 dir=localDir,
             ) as temp_source_copy:
                 source_copy_path = temp_source_copy.name
@@ -542,8 +539,8 @@ def _download_file(
             _convert_compression_format(
                 source_copy_path,
                 target_filepath,
-                source_format_for_convert_to,
-                convert_to,
+                source_fmt,
+                compression,
             )
             return
 
@@ -586,12 +583,12 @@ def _download_file(
 
         # Recompress converted output when needed.
         if source_compression is not None:
-            if should_convert_compression and convert_to:
-                final_compression = convert_to
+            if should_convert_compression and compression:
+                final_compression = compression
             else:
                 final_compression = source_compression
-        elif should_convert_compression and convert_to:
-            final_compression = convert_to
+        elif should_convert_compression and compression:
+            final_compression = compression
         else:
             final_compression = None
 
@@ -622,8 +619,7 @@ def _download_files(
     databus_key: str = None,
     auth_url: str = None,
     client_id: str = None,
-    convert_to: str = None,
-    convert_from: str = None,
+    compression: str = None,
     convert_format: str = None,
     validate_checksum: bool = False,
     checksums: dict | None = None,
@@ -637,8 +633,7 @@ def _download_files(
         databus_key: Databus API key for protected downloads.
         auth_url: Keycloak token endpoint URL.
         client_id: Client ID for token exchange.
-        convert_to: Target compression format for on-the-fly conversion.
-        convert_from: Optional source compression format filter.
+        compression: Target compression format for on-the-fly conversion.
         convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
         checksums: Dictionary mapping URLs to their expected checksums.
@@ -654,8 +649,7 @@ def _download_files(
             databus_key=databus_key,
             auth_url=auth_url,
             client_id=client_id,
-            convert_to=convert_to,
-            convert_from=convert_from,
+            compression=compression,
             convert_format=convert_format,
             validate_checksum=validate_checksum,
             expected_checksum=expected,
@@ -803,8 +797,7 @@ def _download_collection(
     databus_key: str = None,
     auth_url: str = None,
     client_id: str = None,
-    convert_to: str = None,
-    convert_from: str = None,
+    compression: str = None,
     convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
@@ -818,8 +811,7 @@ def _download_collection(
         databus_key: Databus API key for protected downloads.
         auth_url: Keycloak token endpoint URL.
         client_id: Client ID for token exchange.
-        convert_to: Target compression format for on-the-fly conversion.
-        convert_from: Optional source compression format filter.
+        compression: Target compression format for on-the-fly conversion.
         convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
@@ -840,8 +832,7 @@ def _download_collection(
         databus_key=databus_key,
         auth_url=auth_url,
         client_id=client_id,
-        convert_to=convert_to,
-        convert_from=convert_from,
+        compression=compression,
         convert_format=convert_format,
         validate_checksum=validate_checksum,
         checksums=checksums if checksums else None,
@@ -855,8 +846,7 @@ def _download_version(
     databus_key: str = None,
     auth_url: str = None,
     client_id: str = None,
-    convert_to: str = None,
-    convert_from: str = None,
+    compression: str = None,
     convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
@@ -869,8 +859,7 @@ def _download_version(
         databus_key: Databus API key for protected downloads.
         auth_url: Keycloak token endpoint URL.
         client_id: Client ID for token exchange.
-        convert_to: Target compression format for on-the-fly conversion.
-        convert_from: Optional source compression format filter.
+        compression: Target compression format for on-the-fly conversion.
         convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
@@ -890,8 +879,7 @@ def _download_version(
         databus_key=databus_key,
         auth_url=auth_url,
         client_id=client_id,
-        convert_to=convert_to,
-        convert_from=convert_from,
+        compression=compression,
         convert_format=convert_format,
         validate_checksum=validate_checksum,
         checksums=checksums,
@@ -906,8 +894,7 @@ def _download_artifact(
     databus_key: str = None,
     auth_url: str = None,
     client_id: str = None,
-    convert_to: str = None,
-    convert_from: str = None,
+    compression: str = None,
     convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
@@ -921,8 +908,7 @@ def _download_artifact(
         databus_key: Databus API key for protected downloads.
         auth_url: Keycloak token endpoint URL.
         client_id: Client ID for token exchange.
-        convert_to: Target compression format for on-the-fly conversion.
-        convert_from: Optional source compression format filter.
+        compression: Target compression format for on-the-fly conversion.
         convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
@@ -948,8 +934,7 @@ def _download_artifact(
             databus_key=databus_key,
             auth_url=auth_url,
             client_id=client_id,
-            convert_to=convert_to,
-            convert_from=convert_from,
+            compression=compression,
             convert_format=convert_format,
             validate_checksum=validate_checksum,
             checksums=checksums,
@@ -1025,8 +1010,7 @@ def _download_group(
     databus_key: str = None,
     auth_url: str = None,
     client_id: str = None,
-    convert_to: str = None,
-    convert_from: str = None,
+    compression: str = None,
     convert_format: str = None,
     validate_checksum: bool = False,
 ) -> None:
@@ -1040,8 +1024,7 @@ def _download_group(
         databus_key: Databus API key for protected downloads.
         auth_url: Keycloak token endpoint URL.
         client_id: Client ID for token exchange.
-        convert_to: Target compression format for on-the-fly conversion.
-        convert_from: Optional source compression format filter.
+        compression: Target compression format for on-the-fly conversion.
         convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
@@ -1057,8 +1040,7 @@ def _download_group(
             databus_key=databus_key,
             auth_url=auth_url,
             client_id=client_id,
-            convert_to=convert_to,
-            convert_from=convert_from,
+            compression=compression,
             convert_format=convert_format,
             validate_checksum=validate_checksum,
         )
@@ -1107,8 +1089,7 @@ def download(
     all_versions=None,
     auth_url="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token",
     client_id="vault-token-exchange",
-    convert_to=None,
-    convert_from=None,
+    compression=None,
     convert_format=None,
     validate_checksum: bool = False,
 ) -> None:
@@ -1124,8 +1105,8 @@ def download(
         databus_key: Databus API key for protected downloads.
         auth_url: Keycloak token endpoint URL. Default is "https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token".
         client_id: Client ID for token exchange. Default is "vault-token-exchange".
-        convert_to: Target compression format for on-the-fly conversion (supported: bz2, gz, xz).
-        convert_from: Optional source compression format filter.
+        compression: Target compression format for on-the-fly conversion (supported: bz2, gz, xz).
+                     Source compression is auto-detected from the file extension.
         convert_format: Target RDF/tabular format for on-the-fly conversion.
         validate_checksum: Whether to validate checksums after downloading.
     """
@@ -1154,8 +1135,7 @@ def download(
                     databus_key,
                     auth_url,
                     client_id,
-                    convert_to,
-                    convert_from,
+                    compression,
                     convert_format,
                     validate_checksum=validate_checksum,
                 )
@@ -1176,8 +1156,7 @@ def download(
                     databus_key=databus_key,
                     auth_url=auth_url,
                     client_id=client_id,
-                    convert_to=convert_to,
-                    convert_from=convert_from,
+                    compression=compression,
                     convert_format=convert_format,
                     validate_checksum=validate_checksum,
                     expected_checksum=expected,
@@ -1191,8 +1170,7 @@ def download(
                     databus_key=databus_key,
                     auth_url=auth_url,
                     client_id=client_id,
-                    convert_to=convert_to,
-                    convert_from=convert_from,
+                    compression=compression,
                     convert_format=convert_format,
                     validate_checksum=validate_checksum,
                 )
@@ -1208,8 +1186,7 @@ def download(
                     databus_key=databus_key,
                     auth_url=auth_url,
                     client_id=client_id,
-                    convert_to=convert_to,
-                    convert_from=convert_from,
+                    compression=compression,
                     convert_format=convert_format,
                     validate_checksum=validate_checksum,
                 )
@@ -1225,8 +1202,7 @@ def download(
                     databus_key=databus_key,
                     auth_url=auth_url,
                     client_id=client_id,
-                    convert_to=convert_to,
-                    convert_from=convert_from,
+                    compression=compression,
                     convert_format=convert_format,
                     validate_checksum=validate_checksum,
                 )
@@ -1264,9 +1240,8 @@ def download(
                 databus_key=databus_key,
                 auth_url=auth_url,
                 client_id=client_id,
-                convert_to=convert_to,
-                convert_from=convert_from,
+                compression=compression,
                 convert_format=convert_format,
                 validate_checksum=validate_checksum,
                 checksums=checksums if checksums else None,
-            )
+            )
\ No newline at end of file
diff --git a/databusclient/cli.py b/databusclient/cli.py
index e998d4e..50f0766 100644
--- a/databusclient/cli.py
+++ b/databusclient/cli.py
@@ -180,14 +180,12 @@ def deploy(
     help="Client ID for token exchange",
 )
 @click.option(
-    "--convert-to",
+    "--compression",
+    "compression",
     type=click.Choice(["bz2", "gz", "xz"], case_sensitive=False),
-    help="Target compression format for on-the-fly conversion during download (supported: bz2, gz, xz)",
-)
-@click.option(
-    "--convert-from",
-    type=click.Choice(["bz2", "gz", "xz"], case_sensitive=False),
-    help="Source compression format to convert from (optional filter). Only files with this compression will be converted.",
+    help="Target compression format for on-the-fly conversion during download. "
+         "Source compression is detected automatically from the file extension. "
+         "All compressed files will be converted to the target format (bz2, gz, xz).",
 )
 @click.option(
     "--format",
@@ -222,8 +220,7 @@ def download(
     all_versions,
     authurl,
     clientid,
-    convert_to,
-    convert_from,
+    compression,
     convert_format,
     validate_checksum,
 ):
@@ -241,8 +238,7 @@ def download(
             all_versions=all_versions,
             auth_url=authurl,
             client_id=clientid,
-            convert_to=convert_to,
-            convert_from=convert_from,
+            compression=compression,
             convert_format=convert_format,
             validate_checksum=validate_checksum,
         )
diff --git a/tests/test_compression_conversion.py b/tests/test_compression_conversion.py
index 71ada16..8effa1b 100644
--- a/tests/test_compression_conversion.py
+++ b/tests/test_compression_conversion.py
@@ -8,7 +8,7 @@
 import pytest
 from databusclient.api.download import (
     _detect_compression_format,
-    _should_convert_file,
+    _should_convert_compression,
     _get_converted_filename,
     _convert_compression_format,
 )
@@ -23,37 +23,42 @@ def test_detect_compression_format():
     assert _detect_compression_format("FILE.TXT.GZ") == "gz"  # case insensitive
 
 
-def test_should_convert_file():
-    """Test file conversion decision logic"""
+def test_should_convert_compression():
+    """Test file compression conversion decision logic.
+
+    With --compression, source format is auto-detected from the file extension.
+    All compressed files are converted to the target format regardless of their
+    source compression format (no convert_from filter).
+    """
     # No conversion target specified
-    should_convert, source = _should_convert_file("file.txt.bz2", None, None)
+    should_convert, source = _should_convert_compression("file.txt.bz2", None)
     assert should_convert is False
     assert source is None
 
-    # Uncompressed file
-    should_convert, source = _should_convert_file("file.txt", "gz", None)
+    # Uncompressed file — never converted
+    should_convert, source = _should_convert_compression("file.txt", "gz")
     assert should_convert is False
     assert source is None
 
-    # Same source and target
-    should_convert, source = _should_convert_file("file.txt.gz", "gz", None)
+    # Same source and target — skip (no-op)
+    should_convert, source = _should_convert_compression("file.txt.gz", "gz")
     assert should_convert is False
     assert source is None
 
-    # Valid conversion
-    should_convert, source = _should_convert_file("file.txt.bz2", "gz", None)
+    # bz2 -> gz: should convert, source auto-detected
+    should_convert, source = _should_convert_compression("file.txt.bz2", "gz")
     assert should_convert is True
     assert source == "bz2"
 
-    # With convert_from filter matching
-    should_convert, source = _should_convert_file("file.txt.bz2", "gz", "bz2")
+    # xz -> gz: should convert regardless of source format (no filter)
+    should_convert, source = _should_convert_compression("file.txt.xz", "gz")
     assert should_convert is True
-    assert source == "bz2"
+    assert source == "xz"
 
-    # With convert_from filter not matching
-    should_convert, source = _should_convert_file("file.txt.bz2", "gz", "xz")
-    assert should_convert is False
-    assert source is None
+    # gz -> bz2: should convert
+    should_convert, source = _should_convert_compression("file.txt.gz", "bz2")
+    assert should_convert is True
+    assert source == "gz"
 
 
 def test_get_converted_filename():
@@ -195,4 +200,4 @@ def test_corrupted_file_handling():
 
 
 if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
+    pytest.main([__file__, "-v"])
\ No newline at end of file

From cd5e990efe53b62aa744bcd110019e68284b285c Mon Sep 17 00:00:00 2001
From: DhanashreePetare <dhanashreepetare8125@gmail.com>
Date: Mon, 15 Jun 2026 01:06:58 +0530
Subject: [PATCH 6/7] gsoc26: layer2 complete + implementation for #61

---
 databusclient/api/download.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/databusclient/api/download.py b/databusclient/api/download.py
index 56cf07c..41b1f2a 100644
--- a/databusclient/api/download.py
+++ b/databusclient/api/download.py
@@ -521,23 +521,14 @@ def _download_file(
 
     temp_paths: list[str] = []
     try:
-        # Compression-only path keeps existing conversion message behavior.
-        # Use a temp copy so the original downloaded file remains unchanged.
+        # Compression-only path: convert directly from the downloaded file.
+        # _convert_compression_format deletes the source after success,
+        # so the original downloaded file is removed automatically.
         if should_convert_compression and not needs_format_conversion:
             target_filename = _get_converted_filename(file, source_fmt, compression)
             target_filepath = os.path.join(localDir, target_filename)
-
-            with tempfile.NamedTemporaryFile(
-                delete=False,
-                suffix=COMPRESSION_EXTENSIONS[source_fmt],
-                dir=localDir,
-            ) as temp_source_copy:
-                source_copy_path = temp_source_copy.name
-            temp_paths.append(source_copy_path)
-
-            shutil.copyfile(filename, source_copy_path)
             _convert_compression_format(
-                source_copy_path,
+                filename,
                 target_filepath,
                 source_fmt,
                 compression,

From 549ea3bcc0f226eef39ba8123eb4410337a5b90e Mon Sep 17 00:00:00 2001
From: DhanashreePetare <dhanashreepetare8125@gmail.com>
Date: Wed, 17 Jun 2026 01:54:43 +0530
Subject: [PATCH 7/7] docs: update README for --format and --compression flags

---
 README.md | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 354c732..2e55ae9 100644
--- a/README.md
+++ b/README.md
@@ -174,10 +174,10 @@ docker run --rm -v $(pwd):/data dbpedia/databus-python-client download $DOWNLOAD
   Note: Vault tokens are only required for certain protected Databus hosts (for example: `data.dbpedia.io`, `data.dev.dbpedia.link`). The client now detects those hosts and will fail early with a clear message if a token is required but not provided. Do not pass `--vault-token` for public downloads.
 - `--databus-key`
   - If the databus is protected and needs API key authentication, you can provide the API key with `--databus-key YOUR_API_KEY`.
-- `--convert-to`
-  - Enables on-the-fly compression format conversion during download. Supported formats: `bz2`, `gz`, `xz`. Downloaded files will be automatically decompressed and recompressed to the target format. Example: `--convert-to gz` converts all downloaded compressed files to gzip format.
-- `--convert-from`
-  - Optional filter to specify which source compression format should be converted. Use with `--convert-to` to convert only files with a specific compression format. Example: `--convert-to gz --convert-from bz2` converts only `.bz2` files to `.gz`, leaving other formats unchanged.
+- `--compression`
+  - Enables on-the-fly compression format conversion during download. Supported formats: `bz2`, `gz`, `xz`. The source compression is auto-detected from the file extension. Example: `--compression gz` converts all downloaded compressed files to gzip format.
+- `--format`
+  - Enables on-the-fly RDF and tabular format conversion during download (Layer 2). Supported formats: `ntriples` (`nt`), `turtle` (`ttl`), `rdf-xml` (`rdf`, `xml`), `nquads` (`nq`), `trig`, `trix`, `json-ld` (`jsonld`), `csv`, `tsv`. Short aliases shown in brackets. Only the converted output file is kept — the original is deleted after successful conversion. Example: `--format turtle` converts all downloaded RDF triple files to Turtle format.
 - `--validate-checksum`
   - Validates the checksums of downloaded files against the checksums provided by the Databus. If a checksum does not match, an error is raised and the file is deleted.
 
@@ -272,16 +272,28 @@ databusclient download 'PREFIX dcat: <http://www.w3.org/ns/dcat#> SELECT ?x WHER
 docker run --rm -v $(pwd):/data dbpedia/databus-python-client download 'PREFIX dcat: <http://www.w3.org/ns/dcat#> SELECT ?x WHERE { ?sub dcat:downloadURL ?x . } LIMIT 10' --databus https://databus.dbpedia.org/sparql
 ```
 
-**Download with Compression Conversion**: download files and convert them to a different compression format on-the-fly
+**Download with Compression Conversion**: download files and convert compression format on-the-fly. Source compression is auto-detected from the file extension.
 ```bash
 # Convert all compressed files to gzip format
-databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01 --convert-to gz
-
-# Convert only bz2 files to xz format, leaving other compressions unchanged
-databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals --convert-to xz --convert-from bz2
+databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01 --compression gz
 
 # Download a collection and unify all files to bz2 format
-databusclient download https://databus.dbpedia.org/dbpedia/collections/dbpedia-snapshot-2022-12 --convert-to bz2
+databusclient download https://databus.dbpedia.org/dbpedia/collections/dbpedia-snapshot-2022-12 --compression bz2
+```
+
+**Download with Format Conversion**: download files and convert RDF or tabular format on-the-fly. Only the converted output file is kept.
+```bash
+# Convert RDF/XML to Turtle
+databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=az.ttl.bz2 --format turtle
+
+# Convert N-Quads to TriG (within quad equivalence class)
+databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01 --format trig
+
+# Convert RDF to CSV (cross-class, produces companion .meta.json)
+databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=az.ttl.bz2 --format csv
+
+# Combine format conversion and compression
+databusclient download https://databus.dbpedia.org/dbpedia/mappings/mappingbased-literals/2022.12.01/mappingbased-literals_lang=az.ttl.bz2 --format ntriples --compression gz
 ```
 
 <a id="cli-deploy"></a>