From efce749dac04d19443242892235501940c7951ba Mon Sep 17 00:00:00 2001
From: ying2212 <gohguanying@gmail.com>
Date: Tue, 12 May 2026 14:15:19 -0400
Subject: [PATCH 1/3] write SKILL to generate ingest source script

---
 .claude/skills/ingest-source/SKILL.md | 177 ++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 .claude/skills/ingest-source/SKILL.md

diff --git a/.claude/skills/ingest-source/SKILL.md b/.claude/skills/ingest-source/SKILL.md
new file mode 100644
index 0000000..a2568eb
--- /dev/null
+++ b/.claude/skills/ingest-source/SKILL.md
@@ -0,0 +1,177 @@
+---
+name: ingest-source
+description: Generate a Python ingest script that ingests sources (astronomical objects) into an AstroDB database from a parsed data table. Use this skill after match-schema has verified the source name, RA, Dec, and reference columns. Trigger when user says: "ingest sources", "ingest objects", "add new sources to database", or "ingest my data table".
+compatibility: python, astropy, astrodb_utils, astroquery
+---
+
+# Ingest Sources Skill
+Generate a Python script that ingests rows from a data table into the `Sources` table of an AstroDB SQLite database using `astrodb_utils.sources.ingest_source`.
+
+## Prerequisites
+1. **A database file**: JSON data files with a `database.toml` settings file, following the astrodb-template-db structure. If the user doesn't have a database yet, run the `create-astrodb` skill first.
+2. **Installed packages**: `astrodb_utils`, `astropy`, `astroquery` 
+3. **A data table**: CSV, ECSV, FITS, or other astropy-readable format with at minimum: a source name column and a discovery reference column
+4. **Publications table populated**: every reference value must already exist in the `Publications` table. If not, tell user that the reference should be ingested first.
+5. **Internet access (recommended)**: used to query SIMBAD for coordinates when RA/Dec are missing
+
+## Required Inputs
+1. Path to the data table file (CSV, ECSV, FITS, etc.)
+2. Path to `database.toml` (the project config file generated by `create-astrodb`)
+
+## Instrutions
+
+### Step 1: Parse the data table
+Run the `parse-data-table` skill on the provided data file, then read the sidecar it produces:
+```python
+import json, os
+ 
+sidecar = "tmp/astrodb-parse-result.json"
+meta = json.load(open(sidecar))
+file_path = meta["file_path"]   # path to the data table
+reader    = meta["reader"]      # "astropy" or "pandas"
+fmt       = meta["format"]      # astropy format hint, or None
+n_rows    = meta["n_rows"]      # number of rows
+```
+
+Load the table using the same reader that parse-data-table already verified:
+ 
+```python
+from astropy.table import Table
+import pandas as pd
+ 
+if reader == "astropy":
+    kwargs = {"format": fmt} if fmt else {}
+    data = Table.read(file_path, **kwargs)
+else:
+    data = getattr(pd, meta["pandas_method"])(file_path)
+```
+
+
+### Step 2: Confirm column mappings
+Show the user the column names from the parsed result and ask them to identify:
+- Which column is the **source name** (required)
+- Which column is **RA** in decimal degrees (optional)
+- Which column is **Dec** in decimal degrees (optional)
+- Which column is the **discovery reference** (required - must exist in Publications table)
+- Any optional columns: epoch, equinox, comment, other_reference
+
+Example prompt to user:
+> The table has these columns: `source, ra_deg, dec_deg, reference, epoch`
+> Which column is the source name? Which is the discovery reference?
+
+
+### Step 3: Generate ingest_sources.py
+Populate the script config with confirmed mappings and paths from the sidecar.
+Write the script to `tmp/ingest_sources.py`.
+
+### Step 4: Run the script
+Execute `tmp/ingest_sources.py` with `SAVE_DB = False` (dry run) and report:
+- How many sources were ingested successfully
+- Any rows skipped with warnings
+- That the database is still in preview mode
+
+### Step 5: Prompt to save
+After a successful dry run (no errors or only expected warnings), ask the user:
+ 
+> Ingestion preview complete: X sources processed, Y added, Z skipped.
+> Would you like to save these changes to the database? (Sets SAVE_DB = True and re-runs)
+ 
+Only re-run with `SAVE_DB = True` if the user explicitly confirms. Never save automatically.
+
+## Generated Script
+Write `tmp/ingest_sources.py` with the confirmed values filled in:
+ 
+```python
+import logging
+from astropy.table import Table
+import pandas as pd
+from astrodb_utils import build_db_from_json
+from astrodb_utils.sources import ingest_source
+ 
+# --- Logging ---
+astrodb_utils_logger = logging.getLogger("astrodb_utils")
+astrodb_utils_logger.setLevel(logging.INFO)
+logger = logging.getLogger("astrodb_utils.ingest_sources")
+logger.setLevel(logging.INFO)
+ 
+# --- Configuration ---
+SAVE_DB = False  # set True only after dry run confirms no errors
+ 
+# Load database — matches the structure created by the create-astrodb skill.
+# SCHEMA_PATH must point to the cloned schema repo (base_path in build_db_from_json).
+# If you don't have it yet, run:
+#   git clone https://github.com/astrodbtoolkit/astrodb-template-db.git tests/astrodb-template-db
+SCHEMA_PATH  = "tests/astrodb-template-db"        # cloned schema repo
+DB_NAME      = "tests/astrodb-template-tests"     # output .sqlite path (no extension)
+SETTINGS_FILE = "database.toml"                   # matches data_path and felis_path
+ 
+db = build_db_from_json(
+    settings_file=SETTINGS_FILE,
+    base_path=SCHEMA_PATH,
+    db_name=DB_NAME,
+)
+ 
+# --- Data table ---
+# Filled from parse-data-table sidecar (tmp/astrodb-parse-result.json)
+TABLE_PATH = "path/to/data_table.csv"  # fill in
+data = Table.read(TABLE_PATH)
+logger.info(f"Loaded {len(data)} rows from {TABLE_PATH}")
+ 
+# --- Column mapping — filled from confirmed mappings in Step 2 ---
+SOURCE_COL    = "source"      # required
+RA_COL        = "ra_deg"      # set to None if not in table (SIMBAD fallback)
+DEC_COL       = "dec_deg"     # set to None if not in table (SIMBAD fallback)
+REFERENCE_COL = "reference"   # required — must exist in Publications table
+ 
+# Optional columns — set to None if not present
+EPOCH_COL     = None
+EQUINOX_COL   = None
+COMMENT_COL   = None
+OTHER_REF_COL = None
+ 
+# --- Ingest sources ---
+# raise_error=True: stop on first error (good for dry runs and development)
+# raise_error=False: skip bad rows and continue (good for bulk ingestion)
+sources_added = 0
+for row in data:
+    try:
+        ingest_source(
+            db,
+            source=row[SOURCE_COL],
+            reference=row[REFERENCE_COL],
+            ra=row[RA_COL] if RA_COL else None,
+            dec=row[DEC_COL] if DEC_COL else None,
+            epoch=str(row[EPOCH_COL]) if EPOCH_COL else None,
+            equinox=str(row[EQUINOX_COL]) if EQUINOX_COL else None,
+            other_reference=str(row[OTHER_REF_COL]) if OTHER_REF_COL else None,
+            comment=str(row[COMMENT_COL]) if COMMENT_COL else None,
+            raise_error=True,
+        )
+        sources_added += 1
+        logger.info(f"Ingested: {row[SOURCE_COL]}")
+    except Exception as e:
+        logger.warning(f"Skipping {row[SOURCE_COL]}: {e}")
+        continue
+ 
+logger.info(f"Total sources ingested: {sources_added} / {len(data)}")
+ 
+# --- Save database ---
+# Save path matches data_path in database.toml, consistent with create-astrodb skill.
+if SAVE_DB:
+    db.save_database(directory="data/")
+    logger.info("Database saved to data/")
+```
+
+## Key Behaviors
+1. **Missing RA/Dec**: if `RA_COL` or `DEC_COL` is `None`, `ingest_source` queries SIMBAD automatically. If SIMBAD has no match, that row is skipped with a warning.
+2. **Duplicate sources**: if a source already exists, `ingest_source` adds the new name as an alternate in the `Names` table. With `raise_error=False`, duplicates are skipped with a warning.
+3. **Missing reference**: `reference` must already be in `Publications` or ingestion fails. Remind the user to run `ingest_publication` first.
+4. **Unicode dashes**: handled automatically by `ingest_source` (en dash, em dash, minus sign, figure dash → `-`).
+5. **Column name defaults**: `ra_col_name="ra_deg"`, `dec_col_name="dec_deg"`, `epoch_col_name="epoch_year"` — these are the database column names, not the input table column names.
+
+## Output
+Report:
+1. Number of sources successfully ingested vs total rows
+2. Any skipped rows with reasons from the WARNING logs
+3. Confirm whether database was saved or is still in preview mode (`SAVE_DB = False`)
+

From bdfdae8ac73c7ba0ffc00d09e421d6b57d29ae22 Mon Sep 17 00:00:00 2001
From: ying2212 <gohguanying@gmail.com>
Date: Thu, 14 May 2026 13:43:42 -0400
Subject: [PATCH 2/3] restrcuture skill with cleaner step + instruction

---
 .claude/skills/astrodb-ingest-source/SKILL.md | 183 ++++++++++++++++++
 .../references/ingest_source_api.md           |  53 +++++
 .claude/skills/ingest-source/SKILL.md         | 177 -----------------
 3 files changed, 236 insertions(+), 177 deletions(-)
 create mode 100644 .claude/skills/astrodb-ingest-source/SKILL.md
 create mode 100644 .claude/skills/astrodb-ingest-source/references/ingest_source_api.md
 delete mode 100644 .claude/skills/ingest-source/SKILL.md

diff --git a/.claude/skills/astrodb-ingest-source/SKILL.md b/.claude/skills/astrodb-ingest-source/SKILL.md
new file mode 100644
index 0000000..4684a80
--- /dev/null
+++ b/.claude/skills/astrodb-ingest-source/SKILL.md
@@ -0,0 +1,183 @@
+---
+name: ingest-sources
+description: "Generate and run a Python script that ingests sources (astronomical objects) into an AstroDB Sources table from a data table. Use this skill when the user says: ingest sources, ingest objects, add new sources to the database, add objects to SIMPLE, or provides a FITS/CSV/ECSV file and wants to populate the Sources table. Works standalone or as the step after match-schema."
+compatibility: python, astropy, astrodb_utils, astroquery
+---
+
+# Ingest Sources Skill
+
+Generate and run a Python script that ingests rows from a data table into the `Sources`
+table of an AstroDB SQLite database using `astrodb_utils.sources.ingest_source`.
+
+Read `references/ingest_source_api.md` before starting — it has the full signature,
+parameter meanings, and common warnings with fixes.
+
+## Prerequisites
+
+1. **Database**: JSON data files + `database.toml` (astrodb-template-db layout).
+   If absent, run the `create-astrodb` skill first.
+2. **Packages**: `astrodb_utils`, `astropy`, `astroquery`
+3. **Data table**: FITS, CSV, ECSV, or any astropy-readable format, with at minimum
+   a source name column and a discovery reference column.
+4. **Publications populated**: every reference value must already exist in `Publications`.
+   If not, tell the user to run `ingest_publication` first.
+5. **Internet (recommended)**: used by `ingest_source` to query SIMBAD when RA/Dec
+   are not in the table.
+
+## Required Inputs
+1. Path to the data table file (CSV, ECSV, FITS, etc.)
+2. Path to `database.toml` — check in order:
+   1. A path the user explicitly stated in the conversation
+   2. `database.toml` in the current working directory (root of the project)
+   3. If not found, ask the user for the path before continuing
+
+---
+
+## Step 1: Load and inspect the data table
+
+```python
+from astropy.table import Table
+data = Table.read("path/to/file.fits")
+# If auto-detect fails: Table.read(..., format="fits")
+print(data.colnames)
+print(data[:3])
+```
+
+Show the user the **column names**, **dtypes**, and a **3-row preview**.
+
+---
+
+## Step 2: Confirm column mappings
+
+Show the actual column names from Step 1 — **never assume defaults like `source` or
+`ra_deg`**, since real catalogs use names like `Name`, `RA`, `Dec`, `object`, etc.
+
+Ask the user to confirm:
+
+| Role | Required? | Notes |
+|------|-----------|-------|
+| Source name | **Yes** | String column |
+| Discovery reference | **Yes** | Must already exist in `Publications` |
+| RA (decimal degrees) | No | If absent → SIMBAD fallback |
+| Dec (decimal degrees) | No | If absent → SIMBAD fallback |
+| Epoch | No | |
+| Equinox | No | |
+| Comment | No | |
+| Other reference | No | |
+
+After confirmation, read the first value of the reference column — use it as `{REF}`
+to name the output script (e.g. `Burg24`).
+
+Example prompt to user:
+> The table has these columns: `Name, RA, Dec, Dist, Reference`
+> Which column is the source name? Which is the discovery reference?
+
+---
+
+## Step 3: Write `tmp/ingest_{REF}_sources.py`
+
+Fill in all values from Steps 1–2 and write the script to `tmp/ingest_{REF}_sources.py`.
+Every variable below must contain a real value — never write placeholder text to the file.
+
+```python
+import logging
+from astropy.table import Table
+from astrodb_utils import build_db_from_json
+from astrodb_utils.sources import ingest_source
+
+logging.getLogger("astrodb_utils").setLevel(logging.INFO)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+logging.basicConfig(format="%(levelname)s - %(message)s")
+
+SAVE_DB = False  # set True only after a clean dry run
+
+# Adjust to match your project layout
+SCHEMA_PATH   = "tests/astrodb-template-db"
+DB_NAME       = "tests/astrodb-template-tests"
+SETTINGS_FILE = "database.toml"
+
+db = build_db_from_json(
+    settings_file=SETTINGS_FILE,
+    base_path=SCHEMA_PATH,
+    db_name=DB_NAME,
+)
+
+TABLE_PATH    = "path/to/file.fits"  # confirmed in Step 1
+SOURCE_COL    = "Name"               # confirmed in Step 2 — required
+REFERENCE_COL = "Reference"         # confirmed in Step 2 — required
+RA_COL        = "RA"                 # confirmed in Step 2 — set to None → SIMBAD fallback
+DEC_COL       = "Dec"               # confirmed in Step 2 — set to None → SIMBAD fallback
+EPOCH_COL     = None                 # optional — set to column name if present
+EQUINOX_COL   = None                 # optional — set to column name if present
+COMMENT_COL   = None                 # optional — set to column name if present
+OTHER_REF_COL = None                 # optional — set to column name if present
+
+data = Table.read(TABLE_PATH)
+logger.info(f"Loaded {len(data)} rows from {TABLE_PATH}")
+
+sources_added = sources_skipped = 0
+for row in data:
+    source_name = str(row[SOURCE_COL])
+    try:
+        ingest_source(
+            db,
+            source=source_name,
+            reference=str(row[REFERENCE_COL]),
+            ra=float(row[RA_COL]) if RA_COL else None,
+            dec=float(row[DEC_COL]) if DEC_COL else None,
+            epoch=str(row[EPOCH_COL]) if EPOCH_COL else None,
+            equinox=str(row[EQUINOX_COL]) if EQUINOX_COL else None,
+            other_reference=str(row[OTHER_REF_COL]) if OTHER_REF_COL else None,
+            comment=str(row[COMMENT_COL]) if COMMENT_COL else None,
+            raise_error=True,
+        )
+        sources_added += 1
+        logger.info(f"Ingested: {source_name}")
+    except Exception as e:
+        sources_skipped += 1
+        logger.warning(f"Skipping {source_name}: {e}")
+
+logger.info(f"Done: {sources_added} ingested, {sources_skipped} skipped out of {len(data)} rows")
+
+if SAVE_DB:
+    db.save_database(directory="data/")
+    logger.info("Database saved to data/")
+else:
+    logger.info("Dry run complete — NOT saved. Set SAVE_DB = True to persist.")
+```
+
+---
+
+## Step 4: Run the script
+
+Run `tmp/ingest_{REF}_sources.py` with `SAVE_DB = False`. Report:
+
+-  How many sources were ingested successfully
+- Any rows skipped with their warning messages
+- Confirmation that the database was **not** saved
+
+See `references/ingest_source_api.md` for the common warnings table and how to fix each one.
+
+---
+
+## Step 5: Confirm and save
+
+After a successful dry run, ask the user:
+> Ingestion preview complete: **X** ingested, **Y** skipped out of **Z** rows.
+> Would you like to save these changes to the database? (Re-runs with `SAVE_DB = True`)
+
+**Never set `SAVE_DB = True` automatically** — only on explicit user confirmation.
+
+---
+
+## Key Behaviors
+
+1. **Missing RA/Dec**: if `RA_COL = None`, `ingest_source` queries SIMBAD automatically.
+   If SIMBAD has no match, that row is skipped with a warning.
+2. **Duplicate sources**: if a source already exists, `ingest_source` adds the new name
+   as an alternate in `Names` — it does not re-insert into `Sources`.
+3. **Missing reference**: `reference` must already be in `Publications` or ingestion fails.
+   Remind the user to run `ingest_publication` first.
+4. **Unicode dashes**: handled automatically by `ingest_source`
+   (en dash, em dash, minus sign, figure dash → `-`).
\ No newline at end of file
diff --git a/.claude/skills/astrodb-ingest-source/references/ingest_source_api.md b/.claude/skills/astrodb-ingest-source/references/ingest_source_api.md
new file mode 100644
index 0000000..eeab2a7
--- /dev/null
+++ b/.claude/skills/astrodb-ingest-source/references/ingest_source_api.md
@@ -0,0 +1,53 @@
+# astrodb_utils.sources API Reference
+
+Source of truth: https://github.com/astrodbtoolkit/astrodb_utils/blob/main/astrodb_utils/sources.py
+Do NOT copy sources.py into this skill — always use the installed package.
+
+---
+
+## ingest_source signature
+
+```python
+from astrodb_utils.sources import ingest_source
+
+ingest_source(
+    db,                             # astrodbkit Database object (from build_db_from_json)
+    source,                         # str — source name
+    reference: str,                 # str — must exist in Publications table
+    *,
+    ra: float = None,               # decimal degrees; None → SIMBAD lookup
+    dec: float = None,              # decimal degrees; None → SIMBAD lookup
+    epoch: str = None,              # e.g. "2000.0"
+    equinox: str = None,            # e.g. "J2000"
+    other_reference: str = None,
+    comment: str = None,
+    raise_error: bool = True,       # True = stop on error; False = warn and skip
+    search_db: bool = True,         # True = check for duplicates before inserting
+    ra_col_name: str = "ra_deg",    # column name in the DB Sources table for RA
+    dec_col_name: str = "dec_deg",  # column name in the DB Sources table for Dec
+    epoch_col_name: str = "epoch_year",
+    use_simbad: bool = True,        # query SIMBAD if RA/Dec missing or name unresolved
+)
+```
+
+Returns None. Side effects: inserts into `Sources` and `Names`.
+If source already exists: adds new name as alternate in `Names` only.
+
+---
+
+## ra_col_name / dec_col_name
+
+These are column names **in the database Sources table** — not in the input data file.
+Defaults (`ra_deg`, `dec_deg`) match astrodb-template-db. Only change if your DB schema differs.
+
+---
+
+## Common warnings and fixes
+
+| Warning | Cause | Fix |
+|---------|-------|-----|
+| `Discovery reference X missing or not in Publications` | Reference not in Publications | Run `ingest_publication` first |
+| `Coordinates needed and could not be retrieved from SIMBAD` | No RA/Dec + SIMBAD can't resolve name | Provide RA/Dec columns, or check source name spelling |
+| `More than one match for X` | Name resolves to multiple DB candidates | Investigate duplicates manually |
+| `No internet connection, not using Simbad` | SIMBAD unreachable | Provide RA/Dec explicitly |
+| `Coordinates do not match for X` | Provided RA/Dec >60 arcsec from DB entry | Check coordinate columns and units |
\ No newline at end of file
diff --git a/.claude/skills/ingest-source/SKILL.md b/.claude/skills/ingest-source/SKILL.md
deleted file mode 100644
index a2568eb..0000000
--- a/.claude/skills/ingest-source/SKILL.md
+++ /dev/null
@@ -1,177 +0,0 @@
----
-name: ingest-source
-description: Generate a Python ingest script that ingests sources (astronomical objects) into an AstroDB database from a parsed data table. Use this skill after match-schema has verified the source name, RA, Dec, and reference columns. Trigger when user says: "ingest sources", "ingest objects", "add new sources to database", or "ingest my data table".
-compatibility: python, astropy, astrodb_utils, astroquery
----
-
-# Ingest Sources Skill
-Generate a Python script that ingests rows from a data table into the `Sources` table of an AstroDB SQLite database using `astrodb_utils.sources.ingest_source`.
-
-## Prerequisites
-1. **A database file**: JSON data files with a `database.toml` settings file, following the astrodb-template-db structure. If the user doesn't have a database yet, run the `create-astrodb` skill first.
-2. **Installed packages**: `astrodb_utils`, `astropy`, `astroquery` 
-3. **A data table**: CSV, ECSV, FITS, or other astropy-readable format with at minimum: a source name column and a discovery reference column
-4. **Publications table populated**: every reference value must already exist in the `Publications` table. If not, tell user that the reference should be ingested first.
-5. **Internet access (recommended)**: used to query SIMBAD for coordinates when RA/Dec are missing
-
-## Required Inputs
-1. Path to the data table file (CSV, ECSV, FITS, etc.)
-2. Path to `database.toml` (the project config file generated by `create-astrodb`)
-
-## Instrutions
-
-### Step 1: Parse the data table
-Run the `parse-data-table` skill on the provided data file, then read the sidecar it produces:
-```python
-import json, os
- 
-sidecar = "tmp/astrodb-parse-result.json"
-meta = json.load(open(sidecar))
-file_path = meta["file_path"]   # path to the data table
-reader    = meta["reader"]      # "astropy" or "pandas"
-fmt       = meta["format"]      # astropy format hint, or None
-n_rows    = meta["n_rows"]      # number of rows
-```
-
-Load the table using the same reader that parse-data-table already verified:
- 
-```python
-from astropy.table import Table
-import pandas as pd
- 
-if reader == "astropy":
-    kwargs = {"format": fmt} if fmt else {}
-    data = Table.read(file_path, **kwargs)
-else:
-    data = getattr(pd, meta["pandas_method"])(file_path)
-```
-
-
-### Step 2: Confirm column mappings
-Show the user the column names from the parsed result and ask them to identify:
-- Which column is the **source name** (required)
-- Which column is **RA** in decimal degrees (optional)
-- Which column is **Dec** in decimal degrees (optional)
-- Which column is the **discovery reference** (required - must exist in Publications table)
-- Any optional columns: epoch, equinox, comment, other_reference
-
-Example prompt to user:
-> The table has these columns: `source, ra_deg, dec_deg, reference, epoch`
-> Which column is the source name? Which is the discovery reference?
-
-
-### Step 3: Generate ingest_sources.py
-Populate the script config with confirmed mappings and paths from the sidecar.
-Write the script to `tmp/ingest_sources.py`.
-
-### Step 4: Run the script
-Execute `tmp/ingest_sources.py` with `SAVE_DB = False` (dry run) and report:
-- How many sources were ingested successfully
-- Any rows skipped with warnings
-- That the database is still in preview mode
-
-### Step 5: Prompt to save
-After a successful dry run (no errors or only expected warnings), ask the user:
- 
-> Ingestion preview complete: X sources processed, Y added, Z skipped.
-> Would you like to save these changes to the database? (Sets SAVE_DB = True and re-runs)
- 
-Only re-run with `SAVE_DB = True` if the user explicitly confirms. Never save automatically.
-
-## Generated Script
-Write `tmp/ingest_sources.py` with the confirmed values filled in:
- 
-```python
-import logging
-from astropy.table import Table
-import pandas as pd
-from astrodb_utils import build_db_from_json
-from astrodb_utils.sources import ingest_source
- 
-# --- Logging ---
-astrodb_utils_logger = logging.getLogger("astrodb_utils")
-astrodb_utils_logger.setLevel(logging.INFO)
-logger = logging.getLogger("astrodb_utils.ingest_sources")
-logger.setLevel(logging.INFO)
- 
-# --- Configuration ---
-SAVE_DB = False  # set True only after dry run confirms no errors
- 
-# Load database — matches the structure created by the create-astrodb skill.
-# SCHEMA_PATH must point to the cloned schema repo (base_path in build_db_from_json).
-# If you don't have it yet, run:
-#   git clone https://github.com/astrodbtoolkit/astrodb-template-db.git tests/astrodb-template-db
-SCHEMA_PATH  = "tests/astrodb-template-db"        # cloned schema repo
-DB_NAME      = "tests/astrodb-template-tests"     # output .sqlite path (no extension)
-SETTINGS_FILE = "database.toml"                   # matches data_path and felis_path
- 
-db = build_db_from_json(
-    settings_file=SETTINGS_FILE,
-    base_path=SCHEMA_PATH,
-    db_name=DB_NAME,
-)
- 
-# --- Data table ---
-# Filled from parse-data-table sidecar (tmp/astrodb-parse-result.json)
-TABLE_PATH = "path/to/data_table.csv"  # fill in
-data = Table.read(TABLE_PATH)
-logger.info(f"Loaded {len(data)} rows from {TABLE_PATH}")
- 
-# --- Column mapping — filled from confirmed mappings in Step 2 ---
-SOURCE_COL    = "source"      # required
-RA_COL        = "ra_deg"      # set to None if not in table (SIMBAD fallback)
-DEC_COL       = "dec_deg"     # set to None if not in table (SIMBAD fallback)
-REFERENCE_COL = "reference"   # required — must exist in Publications table
- 
-# Optional columns — set to None if not present
-EPOCH_COL     = None
-EQUINOX_COL   = None
-COMMENT_COL   = None
-OTHER_REF_COL = None
- 
-# --- Ingest sources ---
-# raise_error=True: stop on first error (good for dry runs and development)
-# raise_error=False: skip bad rows and continue (good for bulk ingestion)
-sources_added = 0
-for row in data:
-    try:
-        ingest_source(
-            db,
-            source=row[SOURCE_COL],
-            reference=row[REFERENCE_COL],
-            ra=row[RA_COL] if RA_COL else None,
-            dec=row[DEC_COL] if DEC_COL else None,
-            epoch=str(row[EPOCH_COL]) if EPOCH_COL else None,
-            equinox=str(row[EQUINOX_COL]) if EQUINOX_COL else None,
-            other_reference=str(row[OTHER_REF_COL]) if OTHER_REF_COL else None,
-            comment=str(row[COMMENT_COL]) if COMMENT_COL else None,
-            raise_error=True,
-        )
-        sources_added += 1
-        logger.info(f"Ingested: {row[SOURCE_COL]}")
-    except Exception as e:
-        logger.warning(f"Skipping {row[SOURCE_COL]}: {e}")
-        continue
- 
-logger.info(f"Total sources ingested: {sources_added} / {len(data)}")
- 
-# --- Save database ---
-# Save path matches data_path in database.toml, consistent with create-astrodb skill.
-if SAVE_DB:
-    db.save_database(directory="data/")
-    logger.info("Database saved to data/")
-```
-
-## Key Behaviors
-1. **Missing RA/Dec**: if `RA_COL` or `DEC_COL` is `None`, `ingest_source` queries SIMBAD automatically. If SIMBAD has no match, that row is skipped with a warning.
-2. **Duplicate sources**: if a source already exists, `ingest_source` adds the new name as an alternate in the `Names` table. With `raise_error=False`, duplicates are skipped with a warning.
-3. **Missing reference**: `reference` must already be in `Publications` or ingestion fails. Remind the user to run `ingest_publication` first.
-4. **Unicode dashes**: handled automatically by `ingest_source` (en dash, em dash, minus sign, figure dash → `-`).
-5. **Column name defaults**: `ra_col_name="ra_deg"`, `dec_col_name="dec_deg"`, `epoch_col_name="epoch_year"` — these are the database column names, not the input table column names.
-
-## Output
-Report:
-1. Number of sources successfully ingested vs total rows
-2. Any skipped rows with reasons from the WARNING logs
-3. Confirm whether database was saved or is still in preview mode (`SAVE_DB = False`)
-

From bcaff60dde2e7e6b445fd93a73bc15cdfbc41c28 Mon Sep 17 00:00:00 2001
From: ying2212 <gohguanying@gmail.com>
Date: Mon, 18 May 2026 13:58:51 -0400
Subject: [PATCH 3/3] add DB schema column name mapping guidance

---
 .claude/skills/astrodb-ingest-source/SKILL.md | 71 ++++++++++++++-----
 .../references/ingest_source_api.md           |  2 +-
 2 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/.claude/skills/astrodb-ingest-source/SKILL.md b/.claude/skills/astrodb-ingest-source/SKILL.md
index 4684a80..e96e334 100644
--- a/.claude/skills/astrodb-ingest-source/SKILL.md
+++ b/.claude/skills/astrodb-ingest-source/SKILL.md
@@ -37,22 +37,22 @@ parameter meanings, and common warnings with fixes.
 
 ```python
 from astropy.table import Table
-data = Table.read("path/to/file.fits")
+data = Table.read("path/to/file.fits") # astropy auto detects .fits, .csv, .ecsv
 # If auto-detect fails: Table.read(..., format="fits")
 print(data.colnames)
 print(data[:3])
 ```
 
-Show the user the **column names**, **dtypes**, and a **3-row preview**.
+Show the user the **column names**, **dtypes**, and a **3-row preview** so they can confirm mapping in the next step.
 
 ---
 
 ## Step 2: Confirm column mappings
+Ask the user to confirm two things: **(A) input file columns** and **(B) DB schema column names**.
 
-Show the actual column names from Step 1 — **never assume defaults like `source` or
-`ra_deg`**, since real catalogs use names like `Name`, `RA`, `Dec`, `object`, etc.
-
-Ask the user to confirm:
+### A. Input file columns
+ 
+Present the actual column names from Step 1 — **do not assume defaults**.
 
 | Role | Required? | Notes |
 |------|-----------|-------|
@@ -60,8 +60,8 @@ Ask the user to confirm:
 | Discovery reference | **Yes** | Must already exist in `Publications` |
 | RA (decimal degrees) | No | If absent → SIMBAD fallback |
 | Dec (decimal degrees) | No | If absent → SIMBAD fallback |
-| Epoch | No | |
-| Equinox | No | |
+| Epoch | No | e.g. `"2000.0"` |
+| Equinox | No | e.g. `"J2000"` |
 | Comment | No | |
 | Other reference | No | |
 
@@ -72,6 +72,27 @@ Example prompt to user:
 > The table has these columns: `Name, RA, Dec, Dist, Reference`
 > Which column is the source name? Which is the discovery reference?
 
+### B. DB schema column names
+ 
+These are the column names **in the database `Sources` table** — not the input file.
+They vary by database. **Always ask the user which DB they are targeting**, then use the
+known defaults for that DB:
+ 
+| Database | ra_col_name | dec_col_name | epoch_col_name |
+|----------|-------------|--------------|----------------|
+| astrodb-template-db | `ra_deg` | `dec_deg` | `epoch_year` |
+| SIMPLE-db | `ra` | `dec` | `epoch` |
+| Unknown | **ask the user** | **ask the user** | **ask the user** |
+ 
+To confirm for an unknown DB, check the schema with:
+```python
+print(db.metadata.tables["Sources"].columns.keys())
+```
+**Example prompt:**
+> Which database are you ingesting into — SIMPLE-db, astrodb-template-db, or another?
+> (This determines the column names used internally for RA, Dec, and epoch.)
+
+
 ---
 
 ## Step 3: Write `tmp/ingest_{REF}_sources.py`
@@ -103,19 +124,34 @@ db = build_db_from_json(
     db_name=DB_NAME,
 )
 
+# --Load data table--
 TABLE_PATH    = "path/to/file.fits"  # confirmed in Step 1
+data = Table.read(TABLE_PATH)
+logger.info(f"Loaded {len(data)} rows from {TABLE_PATH}")
+
+# --- Column mapping — filled from Step 2 confirmation ---
+# Use the ACTUAL column names from your file (not assumed defaults)
 SOURCE_COL    = "Name"               # confirmed in Step 2 — required
 REFERENCE_COL = "Reference"         # confirmed in Step 2 — required
 RA_COL        = "RA"                 # confirmed in Step 2 — set to None → SIMBAD fallback
 DEC_COL       = "Dec"               # confirmed in Step 2 — set to None → SIMBAD fallback
+
+# Optional columns — set to None if not present in table
 EPOCH_COL     = None                 # optional — set to column name if present
 EQUINOX_COL   = None                 # optional — set to column name if present
 COMMENT_COL   = None                 # optional — set to column name if present
 OTHER_REF_COL = None                 # optional — set to column name if present
 
-data = Table.read(TABLE_PATH)
-logger.info(f"Loaded {len(data)} rows from {TABLE_PATH}")
+# --- DB schema column names — confirmed in Step 2B ---
+# These are column names IN the database Sources table, not the input file.
+# astrodb-template-db defaults: ra_deg, dec_deg, epoch_year
+# SIMPLE-db uses:               ra,     dec,     epoch
+# To check your DB: print(db.metadata.tables["Sources"].columns.keys())
+RA_COL_NAME    = "ra_deg"
+DEC_COL_NAME   = "dec_deg"
+EPOCH_COL_NAME = "epoch_year"
 
+# Ingest Loop
 sources_added = sources_skipped = 0
 for row in data:
     source_name = str(row[SOURCE_COL])
@@ -130,6 +166,9 @@ for row in data:
             equinox=str(row[EQUINOX_COL]) if EQUINOX_COL else None,
             other_reference=str(row[OTHER_REF_COL]) if OTHER_REF_COL else None,
             comment=str(row[COMMENT_COL]) if COMMENT_COL else None,
+            ra_col_name=RA_COL_NAME,
+            dec_col_name=DEC_COL_NAME,
+            epoch_col_name=EPOCH_COL_NAME,
             raise_error=True,
         )
         sources_added += 1
@@ -137,7 +176,7 @@ for row in data:
     except Exception as e:
         sources_skipped += 1
         logger.warning(f"Skipping {source_name}: {e}")
-
+ 
 logger.info(f"Done: {sources_added} ingested, {sources_skipped} skipped out of {len(data)} rows")
 
 if SAVE_DB:
@@ -159,7 +198,6 @@ Run `tmp/ingest_{REF}_sources.py` with `SAVE_DB = False`. Report:
 
 See `references/ingest_source_api.md` for the common warnings table and how to fix each one.
 
----
 
 ## Step 5: Confirm and save
 
@@ -169,10 +207,8 @@ After a successful dry run, ask the user:
 
 **Never set `SAVE_DB = True` automatically** — only on explicit user confirmation.
 
----
-
 ## Key Behaviors
-
+ 
 1. **Missing RA/Dec**: if `RA_COL = None`, `ingest_source` queries SIMBAD automatically.
    If SIMBAD has no match, that row is skipped with a warning.
 2. **Duplicate sources**: if a source already exists, `ingest_source` adds the new name
@@ -180,4 +216,7 @@ After a successful dry run, ask the user:
 3. **Missing reference**: `reference` must already be in `Publications` or ingestion fails.
    Remind the user to run `ingest_publication` first.
 4. **Unicode dashes**: handled automatically by `ingest_source`
-   (en dash, em dash, minus sign, figure dash → `-`).
\ No newline at end of file
+   (en dash, em dash, minus sign, figure dash → `-`).
+5. **DB schema column names**: defaults (`ra_deg`/`dec_deg`/`epoch_year`) match
+   astrodb-template-db. SIMPLE-db uses `ra`/`dec`/`epoch`. Wrong values cause all rows
+   to silently skip — always confirm the target DB in Step 2B.
\ No newline at end of file
diff --git a/.claude/skills/astrodb-ingest-source/references/ingest_source_api.md b/.claude/skills/astrodb-ingest-source/references/ingest_source_api.md
index eeab2a7..2708abc 100644
--- a/.claude/skills/astrodb-ingest-source/references/ingest_source_api.md
+++ b/.claude/skills/astrodb-ingest-source/references/ingest_source_api.md
@@ -40,7 +40,7 @@ If source already exists: adds new name as alternate in `Names` only.
 These are column names **in the database Sources table** — not in the input data file.
 Defaults (`ra_deg`, `dec_deg`) match astrodb-template-db. Only change if your DB schema differs.
 
----
+--- 
 
 ## Common warnings and fixes