From 5f1a338b984ed0ed399df9a1428b53d970a776d4 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Wed, 20 May 2026 15:34:58 +0530 Subject: [PATCH] SK-2838: add claude setup --- .claude/CLAUDE.md | 168 ++++++++++++++++++ .claude/commands/code-review.md | 136 ++++++++++++++ .claude/commands/code-security.md | 80 +++++++++ .claude/commands/code-smell.md | 138 ++++++++++++++ .claude/commands/sdk-sample.md | 75 ++++++++ .claude/commands/test.md | 77 ++++++++ .claude/settings.json | 42 +++++ .../skills/requesting-code-review/SKILL.md | 125 +++++++++++++ 8 files changed, 841 insertions(+) create mode 100644 .claude/CLAUDE.md create mode 100644 .claude/commands/code-review.md create mode 100644 .claude/commands/code-security.md create mode 100644 .claude/commands/code-smell.md create mode 100644 .claude/commands/sdk-sample.md create mode 100644 .claude/commands/test.md create mode 100644 .claude/settings.json create mode 100644 .claude/skills/requesting-code-review/SKILL.md diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 100644 index 0000000..52b8a72 --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1,168 @@ +# CLAUDE.md + +## Project Overview + +This is the Skyflow Python SDK (`skyflow-python`). It provides a Python interface to the Skyflow Data Privacy Vault API — vault operations (insert, get, update, delete, query, tokenize, detokenize, upload_file), service account authentication (bearer tokens, signed data tokens), connections, and detect (deidentify/reidentify text and files). + +**Current version:** 2.x + +## Critical Boundary — Generated Code + +**Never edit files under `skyflow/generated/`.** + +These are auto-generated by [Fern](https://buildwithfern.com) from the Skyflow API definition. Manual edits are overwritten on the next generation run. If you find a bug in generated code, report it — do not patch it directly. + +The `ruff.toml` and coverage omit lists already exclude `generated/` from all checks. + +## Project Structure + +``` +skyflow/ + client/ + skyflow.py # Skyflow facade + Builder (entry point) + vault/ + client/ + client.py # VaultClient — auth, token caching, generated client holder + controller/ + _vault.py # Vault: insert, get, update, delete, query, tokenize, detokenize, upload_file + _connections.py # Connection: invoke() + _detect.py # Detect: deidentify_text, reidentify_text, deidentify_file, get_detect_run + data/ # Request/Response objects: InsertRequest, GetResponse, FileUploadRequest, etc. + tokens/ # DetokenizeRequest/Response, TokenizeRequest/Response + connection/ # InvokeConnectionRequest/Response + detect/ # DeidentifyTextRequest, ReidentifyTextRequest, DeidentifyFileRequest, etc. + service_account/ + _utils.py # generate_bearer_token, generate_bearer_token_from_creds, generate_signed_data_tokens + auth/ # AuthClient — JWT exchange at tokenURI + utils/ + _skyflow_messages.py # All error/log/info strings (Error, ErrorLogs, Info, ErrorCodes, HttpStatus enums) + constants.py # CredentialField, JWT, SdkMetricsKey, OptionField, and top-level constants + _utils.py # handle_exception(), get_metrics(), is_expired() + validations/ + _validations.py # ALL request and config validation — validate_*() functions + enums/ # LogLevel, Env, RedactionType, TokenMode, ContentType, DetectEntities, etc. + logger.py # log_info(), log_error_log(), Logger + error/ + _skyflow_error.py # SkyflowError(message, http_code, request_id, grpc_code, http_status, details) + generated/ # ← FERN-GENERATED, DO NOT EDIT + rest/ # Raw HTTP client, API classes +tests/ # unittest tests mirroring skyflow/ structure +samples/ + vault_api/ # Vault operation samples + service_account/ # Bearer token / signed token samples + connection/ # Connection samples + detect_api/ # Detect samples +docs/ + migrate_to_v2.md # v1 → v2 migration guide + advanced_initialization.md + auth_credentials.md +``` + +## Naming Conventions + +- **Methods / variables / parameters:** `snake_case` — `vault_id`, `get_records`, `token_uri` +- **Classes / Exceptions / Enums:** `PascalCase` — `InsertRequest`, `SkyflowError`, `RedactionType` +- **Constants / module-level values:** `UPPER_SNAKE_CASE` — `SKY_META_DATA_HEADER`, `PROTOCOL` +- **Private methods / attributes:** `_snake_case` — `_validate_ctx`, `_cached_headers` +- **Acronyms are all-lowercase in identifiers:** `skyflow_id` (not `skyflow_ID`), `token_uri` (not `token_URI`), `api_key` (not `API_key`) +- **Response objects:** always use `snake_case` field names — `skyflow_id`, `inserted_fields`, `detokenized_fields` +- **Deprecated methods:** use `@deprecated` from `typing_extensions` for compile-time IDE strikethrough, plus `warnings.warn(msg, DeprecationWarning, stacklevel=2)` for runtime console output +- **Error messages:** always use `SkyflowMessages` enum constants — never hardcode strings in controllers or validators + +## Build and Test + +```bash +# Install dependencies +pip install -r requirements.txt +pip install ".[dev]" # includes ruff and codespell + +# Lint +ruff check . --output-format=github + +# Spell check +codespell + +# Run all tests with coverage +python -m coverage run --source=skyflow \ + --omit=skyflow/generated/*,skyflow/utils/validations/*,skyflow/vault/data/*,skyflow/vault/detect/*,skyflow/vault/tokens/*,skyflow/vault/connection/*,skyflow/error/*,skyflow/utils/enums/*,skyflow/vault/controller/_audit.py,skyflow/vault/controller/_bin_look_up.py \ + -m unittest discover + +# Coverage report +python -m coverage report --show-missing + +# Run a single test +python -m unittest tests.vault.controller.test__vault.TestVault.test_insert + +# Build package +python setup.py sdist bdist_wheel +``` + +**Commit message format:** All commits must include a Jira ticket ID, e.g. `SK-123: description`. CI enforces this on PRs. + +## Credentials Format + +The SDK accepts credentials as a dict with one of the following key patterns: + +```python +# Service account credentials string (JSON) +credentials = {'credentials_string': '{"clientID":"...","tokenURI":"...","keyID":"...","privateKey":"..."}'} + +# Service account credentials file path +credentials = {'path': 'credentials.json'} + +# API key +credentials = {'api_key': ''} + +# Static bearer token +credentials = {'token': ''} +``` + +The canonical credential JSON field names are `clientID`, `tokenURI`, `keyID`, `privateKey`. These are accessed via `CredentialField` constants in `skyflow/utils/constants.py`. + +## Key Design Patterns + +### Controller method flow +Every public controller method follows this exact sequence: +1. `log_info(SkyflowMessages.Info.XXX_TRIGGERED.value, logger)` +2. `validate_xxx_request(logger, request)` — raises `SkyflowError` on invalid input +3. `self.__initialize()` — refreshes bearer token if expired +4. Call generated API via `self.__vault_client.get_xxx_api()` +5. Parse response into typed response object +6. `log_info(SkyflowMessages.Info.XXX_SUCCESS.value, logger)` +7. `except Exception as e: handle_exception(e, logger)` + +### Validation pattern +All validators in `_validations.py` follow: +1. `log_error_log(SkyflowMessages.ErrorLogs.XXX.value, logger)` — log before raising +2. `raise SkyflowError(SkyflowMessages.Error.XXX.value, SkyflowMessages.ErrorCodes.INVALID_INPUT.value)` + +### Credential resolution order +`VaultClient` resolves credentials in this priority order: +1. Config-level credentials (passed to `add_vault_config()`) +2. Skyflow-level credentials (passed to `add_skyflow_credentials()`) +3. `SKYFLOW_CREDENTIALS` environment variable + +## Known Pre-existing Coverage Exclusions + +These modules are excluded from coverage measurement — omissions here are not regressions: + +| Path | Reason | +|---|---| +| `skyflow/generated/*` | Fern-generated REST client | +| `skyflow/utils/validations/*` | Validation-only, tested indirectly via controller tests | +| `skyflow/vault/data/*` | Plain dataclasses, no logic | +| `skyflow/vault/detect/*` | Detect request/response dataclasses | +| `skyflow/vault/tokens/*` | Token request/response dataclasses | +| `skyflow/vault/connection/*` | Connection request/response dataclasses | +| `skyflow/error/*` | Error class, minimal logic | +| `skyflow/utils/enums/*` | Enum definitions only | +| `skyflow/vault/controller/_audit.py` | Audit controller, not yet in test suite | +| `skyflow/vault/controller/_bin_look_up.py` | BIN lookup controller, not yet in test suite | + +## Slash Commands + +- `/code-review` — full review: SDK patterns + naming + test coverage + code smells + security +- `/code-smell` — standalone structural smell analysis (long methods, dead code, misplaced validation) +- `/code-security` — standalone security audit (credentials, input validation, path traversal, HTTP security) +- `/sdk-sample ` — generate a runnable sample file for a vault or service account feature +- `/test [module.path]` — run quality pipeline (lint → spell check → tests → coverage report) diff --git a/.claude/commands/code-review.md b/.claude/commands/code-review.md new file mode 100644 index 0000000..1ee4722 --- /dev/null +++ b/.claude/commands/code-review.md @@ -0,0 +1,136 @@ +--- +name: code-review +description: Full code review — SDK patterns, naming, test coverage, code smells, and security. Reads code-smell.md and code-security.md inline. +paths: + - skyflow/**/*.py + - tests/**/*.py +--- + +You are a senior engineer performing a thorough code review on the Skyflow Python SDK. + +## Review Mode + +Use `$ARGUMENTS` to determine scope: +- `full review` — scan all files under `skyflow/` recursively (exclude `skyflow/generated/`) +- A file or directory path — review only that path +- Empty / default — review files changed on current branch vs `main`: + ```bash + git diff main...HEAD --name-only | grep '\.py$' | grep -v 'generated' + ``` + +**Skip entirely:** `skyflow/generated/` — Fern-generated REST client, read-only. + +--- + +## 1. Request / Response patterns + +- Request classes are plain data holders — all validation happens in `validate_*_request()` inside `skyflow/utils/validations/_validations.py`, not in `__init__`. Flag if validation logic is duplicated outside `_validations.py`. +- Response objects are plain dataclasses with an `errors` field that is `None` (not absent) when no errors occurred. +- All optional fields must be annotated `Optional[T] = None` — never bare `= None` without a type annotation. +- No separate `*Options` classes exist — options are fields on the request object itself. + +--- + +## 2. Error handling + +- All public controller methods must wrap API calls in `try/except Exception` that calls `handle_exception(e, logger)` or raises `SkyflowError` +- `SkyflowError` must be raised with an error code from `SkyflowMessages.ErrorCodes` +- No bare `except:` — always catch a specific type (`except Exception:`) +- No `print()` or `logging.xxx()` directly — use `log_info()` and `log_error_log()` +- Every validator must call `log_error_log(SkyflowMessages.ErrorLogs.xxx.value)` before raising `SkyflowError` + +--- + +## 3. Naming conventions + +| Identifier | Style | Example | +|---|---|---| +| Variable / parameter / method | `snake_case` | `vault_id`, `get_records` | +| Constant / module-level value | `UPPER_SNAKE_CASE` | `SKY_META_DATA_HEADER` | +| Class / Exception / Enum | `PascalCase` | `InsertRequest`, `SkyflowError` | +| Private method / attribute | `_snake_case` | `_validate_ctx` | +| Source file | `snake_case.py` | `_file_upload_request.py` | + +- Acronyms are all-lowercase in snake_case: `skyflow_id` not `skyflow_ID`, `token_uri` not `token_URI` +- Deprecated methods must use `@deprecated` from `typing_extensions` (compile-time IDE warning) plus a `warnings.warn(DeprecationWarning, stacklevel=2)` call at runtime + +--- + +## 4. Response field normalisation + +- All response objects must use `snake_case` field names (`skyflow_id`, not `skyflowId`) +- `errors` must be present on every response class, defaulting to `None` + +--- + +## 5. Test coverage + +- Every public method must have at least one positive and one negative test +- Tests must use `assertEqual` / `assertIsNone` / `assertRaises` — not just bare `assert` +- No mocking of the class under test +- Use `unittest.mock.patch` / `MagicMock` for external dependencies (HTTP, file I/O) + +--- + +## 6. Code quality + +- No magic strings for API field names — use `CredentialField`, `OptionField`, or `SkyflowMessages` constants +- No duplicate validation logic across request classes — belongs in `_validations.py` +- No `# noqa` without a comment explaining why +- `warnings.warn(DeprecationWarning, stacklevel=2)` must be used for deprecation — never `print()` to stderr + +--- + +## 7. Code smells + +Code smells are structural signals — report at **Smell** severity. + +### Method & class size +- **Long method** — any method over 50 lines. Candidate for decomposition. +- **Large parameter list** — more than 5 parameters. Consider a request object. + +### Responsibility violations +- **Business logic in Request/Response classes** — these are data holders. Flag any conditional logic beyond simple attribute assignment. +- **Validation outside `_validations.py`** — any `if x is None: raise SkyflowError(...)` outside `skyflow/utils/validations/` is misplaced. + +### Control flow +- **Deep nesting** — more than 3 levels of `if`/`for`/`try`. Extract inner blocks to named helpers or use early returns. +- **Long if-else chains** — more than 4 branches. Consider a dispatch dict. + +### Data +- **Magic numbers** — literal integers used in comparisons or sizes without a named constant. +- **Mutable default arguments** — `def f(x=[])` or `def f(x={})`. Replace with `None` and initialise in the body. + +### Dead code +- **Unused private methods** or **unused imports** — run `ruff check --select=F401`. +- **Commented-out code** — remove or add a `# TODO: [ticket]` reference. + +--- + +## Output Format + +Group findings by file: + +``` +### skyflow/path/to/file.py + +| Severity | Line | Finding | +|------------|------|------------------------------------------------------------| +| Critical | 42 | SkyflowError swallowed in except block | +| Bug | 87 | skyflow_id not set on response object | +| Quality | 103 | Magic string "records" — use OptionField constant | +| Smell | 210 | Method is 65 lines — candidate for decomposition | +``` + +**Severities:** +| Level | Meaning | +|---|---| +| **Critical** | Data loss, silent failure, security risk — must fix before merge | +| **Bug** | Wrong behaviour, incorrect output — must fix before merge | +| **Edge Case** | Unhandled input that will cause runtime failure — fix before merge | +| **Quality** | Maintainability issue, naming violation, missing pattern — fix before merge | +| **Smell** | Structural signal, technical debt — flag and track | + +End with: +1. A tech-debt summary table grouped by category (Error handling / Naming / Smells / Tests) +2. A verdict: `APPROVE` / `APPROVE WITH FIXES` / `REQUEST CHANGES` diff --git a/.claude/commands/code-security.md b/.claude/commands/code-security.md new file mode 100644 index 0000000..16aac5a --- /dev/null +++ b/.claude/commands/code-security.md @@ -0,0 +1,80 @@ +--- +name: code-security +description: Security audit — credential exposure, input validation, path traversal, HTTP security, token lifecycle, dependency CVEs. +paths: + - skyflow/service_account/**/*.py + - skyflow/vault/client/**/*.py + - skyflow/vault/controller/**/*.py + - skyflow/utils/**/*.py + - requirements.txt +--- + +You are a security engineer auditing the Skyflow Python SDK for vulnerabilities. + +## Audit Scope + +Use `$ARGUMENTS` to determine target files. If none provided, run: +```bash +git diff main...HEAD --name-only | grep '\.py$' | grep -v 'generated' +``` + +**Skip:** `skyflow/generated/` — observations only, no edits. + +## Security Checks + +### 1. Credential and token exposure (Critical) +- Bearer tokens, API keys, and private keys must never appear in log messages (`log_info`, `log_error_log`), `SkyflowError` message strings, or `__repr__` / `__str__` output +- `CredentialField` values (`private_key`, `client_id`, `token_uri`) must not be serialised to logs +- JWT claims must not be logged +- `except` blocks must not log `str(e)` or `repr(e)` when the exception may contain auth headers or credential fields + +### 2. Input validation (High) +- All user-supplied strings passed to `open()`, `os.path.exists()`, or `os.path.join()` must be validated for path traversal (`../`, `..\\`, null bytes `\x00`) +- Regex patterns from user input must be compiled inside `try/except re.error` to prevent `re.error` or ReDoS +- `base64.b64decode()` on external data must use `validate=True` and have a size check before decoding + +### 3. File handling (High) +- All `open()` calls must use a context manager (`with open(...) as f:`) — bare `open()` leaks handles on exception paths +- User-supplied directory paths must be validated with `os.path.isdir()` before use — never call `os.makedirs()` on arbitrary user input +- Output file paths must be constructed with `os.path.join(validated_base, sanitised_name)` — never string concatenation with unsanitised components +- Temporary files must use `tempfile.NamedTemporaryFile` or `tempfile.mkstemp()`, never `"/tmp/" + user_value` + +### 4. HTTP security (Medium) +- All API calls must use HTTPS — flag any hardcoded `http://` URL or URL assembled without a scheme check +- SSL verification must never be disabled (`verify=False` in `httpx` or `requests` calls) +- Auth headers (`Authorization`, `X-Skyflow-Authorization`) must not be logged at any level +- HTTP clients must be configured with connection and read timeouts — flag absent `timeout=` parameters + +### 5. Error information leakage (Medium) +- `SkyflowError` messages must not include raw server response bodies, internal file system paths, or Python tracebacks +- `handle_exception()` must strip sensitive server details before wrapping in `SkyflowError` +- `except` blocks must log only controlled message strings from `SkyflowMessages.ErrorLogs` — never `str(e)` from exceptions that may contain credentials + +### 6. Dependency vulnerabilities (Low) +- Run `pip-audit` against `requirements.txt` and report HIGH / CRITICAL CVEs: + ```bash + pip install pip-audit && pip-audit -r requirements.txt + ``` +- Flag unpinned dependencies on security-sensitive packages (`cryptography`, `PyJWT`, `httpx`, `requests`) — prefer `~=` or exact pins over open `>=` + +### 7. Authentication lifecycle (Medium) +- Bearer tokens fetched via `generate_bearer_token()` must be checked with `is_expired()` immediately before each API call +- `is_expired()` must decode without signature verification only for expiry checking — it must not bypass actual auth decisions +- JWT signing must use `RS256` — flag any path where the algorithm could be set to `HS256` with a user-supplied secret +- Service account credentials files must not be world-readable — check `os.stat(path).st_mode` for `0o644` + +## Output Format + +For each finding: + +``` +### skyflow/path/to/file.py : line N + +**Severity:** Critical / High / Medium / Low / Info +**Risk:** What an attacker or misconfiguration could cause +**Trigger:** Input or code path that triggers the vulnerability +**Fix:** Concrete remediation +**CWE:** CWE-NNN +``` + +End with a summary table and overall risk rating (Critical / High / Medium / Low). diff --git a/.claude/commands/code-smell.md b/.claude/commands/code-smell.md new file mode 100644 index 0000000..ed089d1 --- /dev/null +++ b/.claude/commands/code-smell.md @@ -0,0 +1,138 @@ +--- +name: code-smell +description: Structural smell analysis + spell check — long methods, dead code, misplaced validation, deep nesting, magic numbers. Does not check patterns or security. +paths: + - skyflow/**/*.py +--- + +You are a senior engineer performing a code smell analysis on the Skyflow Python SDK. + +## Scope + +Use `$ARGUMENTS` to determine scope: +- A file or directory path — analyse only that path +- Empty / default — analyse files changed on current branch vs `main`: + ```bash + git diff main...HEAD --name-only | grep '\.py$' | grep -v 'generated' + ``` + +**Skip entirely:** `skyflow/generated/` — Fern-generated REST client, read-only. + +--- + +## Spell check + +Before analysing smells, run codespell on the files in scope: + +```bash +codespell skyflow/ tests/ samples/ docs/ CLAUDE.md +``` + +Report any spelling violations at **Smell** severity in the per-file table. Add legitimate project-specific terms to `.codespell-ignore` rather than treating them as typos. + +--- + +## What Are Code Smells + +Code smells are structural signals — they do not necessarily mean the code is broken, but they indicate areas of technical debt, reduced readability, or future maintenance risk. All findings are reported at **Smell** severity and do not block merge unless they indicate a design violation. + +--- + +## Smell Catalogue + +### Method & Class Size + +**Long method** — any method over 50 lines. +Signal: the method is doing too much. Candidate for decomposition into named private helpers. + +**Large parameter list** — more than 5 parameters on a method. +Signal: consider a request dataclass or keyword-only arguments to group related parameters. + +--- + +### Responsibility Violations + +**Business logic in Request/Response classes** +Request and Response classes are data holders — they carry data, nothing more. Flag any conditional logic, field transformation, or computation beyond simple attribute assignment in `__init__`. + +**Validation outside `_validations.py`** +Any `if x is None: raise SkyflowError(...)` outside `skyflow/utils/validations/` is misplaced validation. All request validation must live in `validate_*()` functions in `_validations.py`. + +--- + +### Control Flow + +**Deep nesting** — more than 3 levels of `if` / `for` / `try` nesting. +Signal: extract inner blocks to named private methods or use early returns. + +**Long if-else chains** — more than 4 branches on the same condition. +Signal: consider a dispatch `dict` or separate handler methods. + +**Null checks scattered** +Multiple consecutive `if x is None` guards that could be replaced with an early return guard clause. + +--- + +### Data + +**Magic numbers** +Literal integers or sizes (e.g. `25`, `3600`, `100`) in comparisons or logic without a named constant. Use `UPPER_SNAKE_CASE` constants in `skyflow/utils/constants.py`. + +**Mutable default arguments** +`def f(self, items=[], config={})` — mutable defaults are shared across all calls. Replace with `None` and initialise in the body. + +**Temporary attribute** +A `self.xxx` attribute that is only set in certain code paths and `None` the rest of the time. Should be a local variable or method parameter instead. + +--- + +### Dead Code + +**Unused private methods** — private methods (`_xxx`) with no callers. + +**Unused imports** — run `ruff check --select=F401` to identify. + +**Unreachable code** — code after `return` / `raise` in the same branch. + +**Commented-out code** — blocks of commented code without explanation. Remove entirely or add a `# TODO: [ticket]` with context. + +--- + +### Comments + +**Explains what, not why** +A comment that restates what the code does (`# get the vault ID`) adds no value. Flag comments that explain *what* without explaining *why*. + +**Stale comment** +A comment that contradicts the current code — e.g. references a removed parameter, an old method name, or a behaviour that has changed. + +--- + +## Output Format + +Group findings by file: + +``` +### skyflow/path/to/file.py + +| Smell | Line | Detail | +|---------------------------|------|-------------------------------------------------------------| +| Long method | 42 | _process_insert_response() is 67 lines — decompose | +| Business logic in Response| 88 | __init__ renames skyflow_id — move to controller | +| Magic number | 103 | Literal 25 — extract to MAX_QUERY_RECORDS constant | +| Stale comment | 210 | References removed tokenized_data field | +| Dead code | 315 | Private method _build_headers() has no callers | +``` + +End with a **Smell Summary** table: + +``` +| Category | Count | Files affected | +|-----------------------|-------|-----------------------------| +| Long methods | 2 | _vault.py | +| Business logic in DTO | 1 | _query_response.py | +| Magic numbers | 3 | _validations.py | +| Dead code | 2 | _utils.py | +``` + +Close with a recommendation: **CLEAN** / **MINOR DEBT** / **SIGNIFICANT DEBT** and a one-sentence summary. diff --git a/.claude/commands/sdk-sample.md b/.claude/commands/sdk-sample.md new file mode 100644 index 0000000..61d46f5 --- /dev/null +++ b/.claude/commands/sdk-sample.md @@ -0,0 +1,75 @@ +--- +name: sdk-sample +description: Generate a Skyflow Python SDK sample file for a vault feature or service account operation. Verified runnable after creation. +paths: + - samples/**/*.py +--- + +Create a Skyflow Python SDK sample file demonstrating: $ARGUMENTS + +## File placement + +| Feature type | Directory | +|---|---| +| Vault ops (insert/get/update/delete/query/tokenize/upload_file) | `samples/vault_api/` | +| Service account auth (bearer token, signed data tokens) | `samples/service_account/` | +| Connection | `samples/connection/` | +| Detect (deidentify/reidentify text and files) | `samples/detect_api/` | + +File name: `.py` (snake_case) + +## Structure (follow this order) + +1. Module-level docstring describing what the sample demonstrates +2. Imports — only from `skyflow.*`; never from `skyflow.generated.*` +3. Credentials dict — choose based on feature: + - **Vault ops / Detect:** `credentials = {'credentials_string': ''}` or `{'api_key': ''}` + - **Service account:** pass the path to a credentials JSON file or a credentials string +4. Vault / connection config dict with `vault_id`, `cluster_id`, `env`, `credentials` +5. Build the Skyflow client: + ```python + skyflow_client = ( + Skyflow.builder() + .add_vault_config(vault_config) + .set_log_level(LogLevel.INFO) + .build() + ) + ``` +6. Request object — plain constructor with keyword arguments (no builder, no separate Options class): + ```python + request = InsertRequest( + table='table_name', + values=[{'field': 'value'}], + return_tokens=True, + ) + ``` +7. Call the vault/detect/connection method inside `try/except SkyflowError`: + ```python + response = skyflow_client.vault('').insert(request) + print(response) + ``` +8. Catch `SkyflowError` and print structured error details: + ```python + except SkyflowError as e: + print({'code': e.http_code, 'message': e.message, 'details': e.details}) + ``` + +## Rules + +- Vault IDs / cluster IDs use placeholders: `''`, `''` +- Credential values use placeholders: `''`, `''` +- Credentials file path: `'credentials.json'` (relative — no absolute paths) +- Always use single quotes for string literals (matches project style) +- No separate `*Options` classes — they don't exist in this SDK; all options are fields on the request object +- Always catch `SkyflowError` and print `e.http_code`, `e.message`, `e.details` +- Keep under 80 lines +- Do not import from `skyflow.generated.*` + +## After creating the file + +Verify the file has no syntax errors and that all imports resolve: +```bash +python -m py_compile samples//.py && echo "OK" +``` + +Report the file path and any errors. diff --git a/.claude/commands/test.md b/.claude/commands/test.md new file mode 100644 index 0000000..21ba3b3 --- /dev/null +++ b/.claude/commands/test.md @@ -0,0 +1,77 @@ +--- +name: test +description: Quality pipeline — lint, spell check, tests, coverage analysis. Pass a test class or module path to target a single test. +paths: + - skyflow/**/*.py + - tests/**/*.py +--- + +Run the Skyflow Python SDK quality pipeline. + +Use `$ARGUMENTS` to target a specific test module (e.g. `tests.client.test_skyflow`). If empty, run the full suite. + +## Known Pre-existing Exclusions + +The coverage run omits generated and boilerplate-only modules. These are not regressions: +- `skyflow/generated/*` — Fern-generated REST client +- `skyflow/utils/validations/*`, `skyflow/vault/data/*`, `skyflow/vault/detect/*` +- `skyflow/vault/tokens/*`, `skyflow/vault/connection/*`, `skyflow/error/*` +- `skyflow/utils/enums/*` +- `skyflow/vault/controller/_audit.py`, `skyflow/vault/controller/_bin_look_up.py` + +## Pipeline + +### Step 1 — Lint +```bash +ruff check . --output-format=github +``` +Expected: no output (clean). Report any errors — these block CI. + +### Step 2 — Spell check +```bash +codespell +``` +Report any unknown words. Add legitimate project terms to the ignore list rather than fixing them as typos. + +### Step 3 — Tests +If `$ARGUMENTS` is set: +```bash +python -m coverage run --source=skyflow \ + --omit=skyflow/generated/*,skyflow/utils/validations/*,skyflow/vault/data/*,skyflow/vault/detect/*,skyflow/vault/tokens/*,skyflow/vault/connection/*,skyflow/error/*,skyflow/utils/enums/*,skyflow/vault/controller/_audit.py,skyflow/vault/controller/_bin_look_up.py \ + -m unittest $ARGUMENTS +``` +Otherwise: +```bash +python -m coverage run --source=skyflow \ + --omit=skyflow/generated/*,skyflow/utils/validations/*,skyflow/vault/data/*,skyflow/vault/detect/*,skyflow/vault/tokens/*,skyflow/vault/connection/*,skyflow/error/*,skyflow/utils/enums/*,skyflow/vault/controller/_audit.py,skyflow/vault/controller/_bin_look_up.py \ + -m unittest discover +``` +Report: tests run, failures, errors. + +### Step 4 — Coverage report +```bash +python -m coverage report --show-missing +``` +Flag any public interface module (`skyflow/client/`, `skyflow/vault/controller/`, `skyflow/service_account/`) below 80% coverage. + +### Step 5 — Coverage gaps +For classes below complete coverage, identify missing scenarios: +- `None` / empty inputs +- Invalid types / wrong enum values +- Error paths (API rejection, validation failure) +- Concurrent / reuse scenarios + +Write concrete `unittest.TestCase` method stubs (not full implementations) for each gap. + +### Step 6 — Report + +``` +| Step | Status | Notes | +|---------------|-----------|------------------------------| +| Lint | ✅ / ❌ | ... | +| Spell check | ✅ / ❌ | ... | +| Tests | ✅ / ❌ | N passed, M failed | +| Coverage | ✅ / ❌ | list modules below threshold | +``` + +Conclude with **READY TO MERGE** or **NEEDS FIXES** and a prioritised fix list. diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..a7db077 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,42 @@ +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "python3 -c \"\nimport sys, json, subprocess\nd = json.load(sys.stdin)\nf = d.get('tool_input', {}).get('file_path', d.get('file_path', ''))\nif f and f.endswith('.py') and 'generated' not in f:\n if 'samples' in f:\n subprocess.run(['.venv/bin/python', '-m', 'ruff', 'check', '--fix', '--force-exclude', f], capture_output=True)\n subprocess.run(['.venv/bin/python', '-m', 'ruff', 'format', '--force-exclude', f], capture_output=True)\n\"" + } + ] + } + ], + "PreToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "python3 -c \"import sys,json; d=json.load(sys.stdin); p=d.get('tool_input',{}).get('file_path',d.get('file_path','')); banned='skyflow/generated'; (sys.stderr.write('BLOCKED: Fern-generated code — do not edit manually\\n'), sys.exit(2)) if banned in p else sys.exit(0)\"" + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "notify-send 'Claude Code' 'Claude finished' 2>/dev/null || true" + } + ] + } + ] + }, + "permissions": { + "deny": [ + "Edit(skyflow/generated/**)", + "Write(skyflow/generated/**)" + ] + } +} diff --git a/.claude/skills/requesting-code-review/SKILL.md b/.claude/skills/requesting-code-review/SKILL.md new file mode 100644 index 0000000..c5da281 --- /dev/null +++ b/.claude/skills/requesting-code-review/SKILL.md @@ -0,0 +1,125 @@ +--- +name: requesting-code-review +description: Use when completing tasks, implementing major features, or before merging to verify work meets requirements +paths: + - skyflow/**/*.py + - tests/**/*.py +--- + +# Requesting Code Review + +Dispatch a code reviewer subagent to catch issues before they cascade. The reviewer gets precisely crafted context for evaluation — never your session's history. This keeps the reviewer focused on the work product, not your thought process, and preserves your own context for continued work. + +**Core principle:** Review early, review often. + +## When to Request Review + +**Mandatory:** +- After each task in subagent-driven development +- After completing a major feature +- Before merge to main + +**Optional but valuable:** +- When stuck (fresh perspective) +- Before refactoring (baseline check) +- After fixing a complex bug + +## How to Request + +**1. Get git SHAs:** +```bash +BASE_SHA=$(git rev-parse HEAD~1) # or origin/main +HEAD_SHA=$(git rev-parse HEAD) +``` + +**2. Choose the right review type for this project:** + +| Change type | Use | +|---|---| +| SDK logic, patterns, naming, tests | `/code-review` — runs SDK checks + smell + security | +| Structural debt only | `/code-smell` — standalone smell analysis | +| Auth, credentials, tokens, HTTP | `/code-security` — standalone security audit | +| Full review via subagent | Dispatch with template below | + +For a full feature branch vs main: +```bash +BASE_SHA=$(git merge-base main HEAD) +HEAD_SHA=$(git rev-parse HEAD) +``` + +For security-sensitive changes (auth, credentials, bearer tokens, HTTP headers) — dispatch both quality and security: +```bash +/code-review skyflow/service_account/ +/code-security skyflow/service_account/ +``` + +**3. Dispatch code reviewer subagent:** + +Use Agent tool with `general-purpose` type and fill in the template placeholders: + +**Placeholders:** +- `{DESCRIPTION}` — Brief summary of what you built +- `{PLAN_OR_REQUIREMENTS}` — What it should do +- `{BASE_SHA}` — Starting commit +- `{HEAD_SHA}` — Ending commit + +**4. Act on feedback:** +- Fix Critical issues immediately +- Fix Bug / Edge Case issues before proceeding +- Note Quality / Smell issues for later +- Push back if the reviewer is wrong (with reasoning) + +## Example + +``` +[Just completed: Add FileUploadRequest deprecation shim] + +You: Let me request code review before proceeding. + +BASE_SHA=$(git merge-base main HEAD) +HEAD_SHA=$(git rev-parse HEAD) + +[Dispatch code reviewer subagent] + DESCRIPTION: Added *args shim to FileUploadRequest for backward-compatible positional arg order + PLAN_OR_REQUIREMENTS: Old order (table, skyflow_id, column_name) must still work with DeprecationWarning + BASE_SHA: a7981ec + HEAD_SHA: 3df7661 + +[Subagent returns]: + Strengths: Clean shim, correct stacklevel=2, good test coverage + Issues: + Bug: Magic value 2 in len(args) >= 2 — PLR2004 ruff violation + Quality: Warning message doesn't mention what to use instead + Assessment: Approve with fixes + +You: [Fix ruff violation and update warning message] +[Continue to next task] +``` + +## Integration with Workflows + +**Subagent-Driven Development:** +- Review after EACH task +- Catch issues before they compound +- Fix before moving to the next task + +**Executing Plans:** +- Review after each task or at natural checkpoints +- Get feedback, apply, continue + +**Ad-Hoc Development:** +- Review before merge +- Review when stuck + +## Red Flags + +**Never:** +- Skip review because "it's simple" +- Ignore Critical issues +- Proceed with unfixed Bug / Edge Case issues +- Argue with valid technical feedback + +**If reviewer is wrong:** +- Push back with technical reasoning +- Show code / tests that prove it works +- Request clarification