litedatum · litedatum · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,16 +8,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
-- None
+- feat(cli): refactor check command interface from positional arguments to `--conn` and `--table` options
+- feat(cli): add comprehensive test coverage for new CLI interface functionality
+- feat(cli): support explicit table name specification independent of database URL
 
 ### Changed
-- None
+- **BREAKING CHANGE**: CLI interface changed from `vlite-cli check <source>` to `vlite-cli check --conn <connection> --table <table_name>`
+- refactor(cli): update SourceParser to accept optional table_name parameter
+- refactor(cli): modify check command to pass table_name to SourceParser.parse_source()
+- refactor(tests): update all existing CLI tests to use new interface format
+- refactor(tests): add new test cases specifically for table name parameter validation
 
 ### Fixed
-- None
+- fix(cli): resolve issue where `--table` parameter was not correctly passed to backend
+- fix(cli): ensure table name from `--table` option takes precedence over table name in database URL
+- fix(tests): update regression tests to use new CLI interface format
+- fix(tests): resolve test failures caused by interface changes
 
 ### Removed
-- None
+- **BREAKING CHANGE**: remove backward compatibility for old positional argument interface
+- remove(cli): eliminate support for `<source>` positional argument in check command
 
 ## [0.4.0] - 2025-01-27
 

diff --git a/cli/commands/check.py b/cli/commands/check.py
@@ -38,7 +38,13 @@
 
 
 @click.command("check")
-@click.argument("source", required=True)
+@click.option(
+    "--conn",
+    "connection_string",
+    required=True,
+    help="Database connection string or file path",
+)
+@click.option("--table", "table_name", required=True, help="Table name to validate")
 @click.option(
     "--rule",
     "rules",
@@ -59,7 +65,8 @@
     help="Show detailed information and failure samples",
 )
 def check_command(
-    source: str,
+    connection_string: str,
+    table_name: str,
     rules: Tuple[str, ...],
     rules_file: Optional[str],
     quiet: bool,
@@ -68,18 +75,22 @@ def check_command(
     """
     Check data quality for the given source.
 
+    NEW FORMAT:
+        vlite-cli check --conn <connection> --table <table_name> [options]
+
     SOURCE can be:
     - File path: users.csv, data.xlsx, records.json
-    - Database URL: mysql://user:pass@host/db.table
+    - Database URL: mysql://user:pass@host/db
     - SQLite file: sqlite:///path/to/file.db
 
     Examples:
-        vlite-cli check users.csv --rule "not_null(id)"
-        vlite-cli check mysql://user:pass@host/db.users --rules validation.json
+        vlite-cli check --conn users.csv --table users --rule "not_null(id)"
+        vlite-cli check --conn mysql://user:pass@host/db \
+            --table users --rules validation.json
     """
     # Record start time
     start_time = now()
-    logger.info(f"Starting data quality check for: {source}")
+    logger.info(f"Starting data quality check for: {connection_string}")
 
     # Create exception handler
     exception_handler = CliExceptionHandler(verbose=verbose)
@@ -111,23 +122,23 @@ def check_command(
                 )
 
             # Parse source
-            safe_echo(f"🔍 Analyzing source: {source}")
+            safe_echo(f"🔍 Analyzing source: {connection_string}")
 
             # Proactively verify that a provided file is not empty – this avoids
             # kicking off heavy validation logic only to discover the file is
             # useless.  The modern test-suite expects a graceful early-exit with a
             # clear error message in such a scenario.
-            potential_path = Path(source)
+            potential_path = Path(connection_string)
             if potential_path.exists() and potential_path.is_file():
                 if potential_path.stat().st_size == 0:
                     raise click.ClickException(
-                        f"Error: Source file '{source}' is empty "
+                        f"Error: Source file '{connection_string}' is empty "
                         "– nothing to validate."
                     )
 
             # Parse source config - this may raise Schema creation error
             # (OperationError)
-            source_config = source_parser.parse_source(source)
+            source_config = source_parser.parse_source(connection_string, table_name)
 
             # Parse rules - this may raise Schema creation error
             # (RuleExecutionError)
@@ -205,7 +216,7 @@ def check_command(
             output_formatter.display_results(
                 results=results_dicts,
                 rules=rule_configs,  # Pass as objects, not dicts
-                source=source,
+                source=connection_string,
                 execution_time=execution_time,
                 total_rules=len(rule_configs),
             )
@@ -248,7 +259,7 @@ def check_command(
             output_formatter.display_results(
                 results=results_dicts,
                 rules=rule_configs,  # Pass as objects, not dicts
-                source=source,
+                source=connection_string,
                 execution_time=execution_time,
                 total_rules=len(rule_configs),
             )

diff --git a/cli/commands/schema.py b/cli/commands/schema.py
@@ -315,11 +315,10 @@ def _build_prioritized_atomic_status(
     # Build per-column guard from SCHEMA details
     column_guard: Dict[str, str] = {}  # column -> NONE|FIELD_MISSING|TYPE_MISMATCH
     if schema_result:
-        details = (
-            schema_result.get("execution_plan", {})
-            .get("schema_details", {})
-            .get("field_results", [])
-        )
+        # Safely access nested dictionaries, checking for None at each level.
+        execution_plan = schema_result.get("execution_plan") or {}
+        schema_details = execution_plan.get("schema_details") or {}
+        details = schema_details.get("field_results") or []
         for item in details:
             col = str(item.get("column"))
             code = str(item.get("failure_code", "NONE"))
@@ -417,15 +416,22 @@ def _create_validator(
             core_config=core_config,
             cli_config=cli_config,
         )
-    except TypeError:
-        return DataValidator()  # type: ignore[call-arg]
+    except Exception as e:
+        logger.error(f"Failed to create DataValidator: {str(e)}")
+        raise click.UsageError(f"Failed to create validator: {str(e)}")
 
 
 def _run_validation(validator: Any) -> Tuple[List[Any], float]:
     import asyncio
 
     start = _now()
-    results = asyncio.run(validator.validate())
+    logger.debug("Starting validation")
+    try:
+        results = asyncio.run(validator.validate())
+        logger.debug(f"Validation returned {len(results)} results")
+    except Exception as e:
+        logger.error(f"Validation failed: {str(e)}")
+        raise
     exec_seconds = (_now() - start).total_seconds()
     return results, exec_seconds
 
@@ -440,6 +446,8 @@ def _extract_schema_result_dict(
         if not schema_rule:
             return None
         for r in results:
+            if r is None:
+                continue
             rid = ""
             if hasattr(r, "rule_id"):
                 try:
@@ -618,11 +626,11 @@ def _ensure_check(entry: Dict[str, Any], name: str) -> Dict[str, Any]:
     if schema_result_dict:
         try:
             extras = (
-                (schema_result_dict.get("execution_plan") or {}).get(
-                    "schema_details", {}
-                )
-                or {}
-            ).get("extras", [])
+                (schema_result_dict or {})
+                .get("execution_plan", {})
+                .get("schema_details", {})
+                .get("extras", [])
+            )
             if isinstance(extras, list):
                 schema_extras = [str(x) for x in extras]
         except Exception:
@@ -720,11 +728,9 @@ def _calc_failed(res: Dict[str, Any]) -> int:
 
     column_guard: Dict[str, str] = {}
     if schema_result_dict:
-        details = (
-            schema_result_dict.get("execution_plan", {})
-            .get("schema_details", {})
-            .get("field_results", [])
-        )
+        execution_plan = schema_result_dict.get("execution_plan") or {}
+        schema_details = execution_plan.get("schema_details") or {}
+        details = schema_details.get("field_results") or []
         for item in details:
             col = str(item.get("column"))
             column_guard[col] = str(item.get("failure_code", "NONE"))
@@ -832,7 +838,13 @@ def _calc_failed(res: Dict[str, Any]) -> int:
 
 
 @click.command("schema")
-@click.argument("source", required=True)
+@click.option(
+    "--conn",
+    "connection_string",
+    required=True,
+    help="Database connection string or file path",
+)
+@click.option("--table", "table_name", required=True, help="Table name to validate")
 @click.option(
     "--rules",
     "rules_file",
@@ -862,7 +874,8 @@ def _calc_failed(res: Dict[str, Any]) -> int:
 )
 @click.option("--verbose", is_flag=True, default=False, help="Enable verbose output")
 def schema_command(
-    source: str,
+    connection_string: str,
+    table_name: str,
     rules_file: str,
     output: str,
     fail_on_error: bool,
@@ -871,18 +884,30 @@ def schema_command(
 ) -> None:
     """Schema validation command with minimal rules file validation.
 
-    Decomposition and execution are added in subsequent tasks.
+    NEW FORMAT:
+        vlite-cli schema --conn <connection> --table <table_name> \
+            --rules <rules_file> [options]
+
+    SOURCE can be:
+    - File path: users.csv, data.xlsx, records.json
+    - Database URL: mysql://user:pass@host/db
+    - SQLite file: sqlite:///path/to/file.db
+
+    Examples:
+        vlite-cli schema --conn users.csv --table users --rules schema.json
+        vlite-cli schema --conn mysql://user:pass@host/db --table users \
+            --rules schema.json
     """
 
     from cli.core.config import get_cli_config
     from core.config import get_core_config
 
     # start_time = now()
     try:
-        _maybe_echo_analyzing(source, output)
-        _guard_empty_source_file(source)
+        _maybe_echo_analyzing(connection_string, output)
+        _guard_empty_source_file(connection_string)
 
-        source_config = SourceParser().parse_source(source)
+        source_config = SourceParser().parse_source(connection_string)
 
         rules_payload = _read_rules_payload(rules_file)
 
@@ -892,10 +917,28 @@ def schema_command(
         # Decompose into atomic rules per design
         atomic_rules = _decompose_to_atomic_rules(rules_payload)
 
-        # Fast-path: no rules → emit minimal payload and exit cleanly
+        # FIX: Manually populate the target table and database from CLI args
+        # The source_config object is a class instance, not a dict.
+        # Use attribute access.
+        source_db = source_config.db_name
+        if not source_db:
+            source_db = "unknown"
+
+        for rule in atomic_rules:
+            if rule.target and rule.target.entities:
+                rule.target.entities[0].database = source_db
+                rule.target.entities[0].table = table_name
+
+        # get database name from SourceParser results
+        # source_db = source_config.get('database')
+        # for rule in atomic_rules:
+        #     if rule.target and rule.target.entities:
+        #         rule.target.entities[0].database = source_db
+        #         rule.target.entities[0].table = table_name
+        # Fast-path: no rules -> emit minimal payload and exit cleanly
         if len(atomic_rules) == 0:
             _early_exit_when_no_rules(
-                source=source,
+                source=connection_string,
                 rules_file=rules_file,
                 output=output,
                 fail_on_error=fail_on_error,
@@ -923,7 +966,7 @@ def schema_command(
         # Apply skip map to JSON output only; table mode stays concise by design
         if output.lower() == "json":
             _emit_json_output(
-                source=source,
+                source=connection_string,
                 rules_file=rules_file,
                 atomic_rules=atomic_rules,
                 results=results,
@@ -933,7 +976,7 @@ def schema_command(
             )
         else:
             _emit_table_output(
-                source=source,
+                source=connection_string,
                 atomic_rules=atomic_rules,
                 results=results,
                 skip_map=skip_map,

diff --git a/cli/core/source_parser.py b/cli/core/source_parser.py
@@ -52,12 +52,15 @@ def __init__(self) -> None:
             ".jsonl": ConnectionType.JSON,
         }
 
-    def parse_source(self, source: str) -> ConnectionSchema:
+    def parse_source(
+        self, source: str, table_name: Optional[str] = None
+    ) -> ConnectionSchema:
         """
         Parse source string into ConnectionSchema.
 
         Args:
             source: Source string (file path or database URL)
+            table_name: Optional table name (overrides table from URL if provided)
 
         Returns:
             ConnectionSchema: Parsed connection configuration
@@ -75,7 +78,7 @@ def parse_source(self, source: str) -> ConnectionSchema:
                 raise ValidationError("Unrecognized source format: Empty source")
 
             if self._is_database_url(source):
-                return self._parse_database_url(source)
+                return self._parse_database_url(source, table_name)
             elif source.startswith("file://"):
                 # Handle file:// protocol
                 file_path = source[7:]  # Remove file:// prefix
@@ -118,14 +121,20 @@ def _is_file_path(self, source: str) -> bool:
 
         return False
 
-    def _parse_database_url(self, url: str) -> ConnectionSchema:
+    def _parse_database_url(
+        self, url: str, table_name: Optional[str] = None
+    ) -> ConnectionSchema:
         """
         Parse database URL into connection configuration.
 
         Supports formats:
         - mysql://user:pass@host:port/database.table
         - postgres://user:pass@host:port/database.table
         - sqlite:///path/to/database.db.table
+
+        Args:
+            url: Database connection URL
+            table_name: Optional table name (overrides table from URL if provided)
         """
         self.logger.debug(f"Parsing database URL: {url}")
 
@@ -136,7 +145,10 @@ def _parse_database_url(self, url: str) -> ConnectionSchema:
         parsed = urllib.parse.urlparse(url)
 
         # Extract database and table from path
-        database, table = self._extract_db_table_from_path(parsed.path)
+        database, table_from_url = self._extract_db_table_from_path(parsed.path)
+
+        # Use provided table_name if available, otherwise use table from URL
+        table = table_name if table_name is not None else table_from_url
 
         # Handle SQLite special case
         if conn_type == ConnectionType.SQLITE: