|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +############################################################################################################### |
| 4 | +# In this example script, we want to extract the extra field "CAS" from the resources "Chemical Compounds" # |
| 5 | +# This can then be used to re-import the compounds in the compounds table and complete them with pubchem data # |
| 6 | +# The first step is to export all entries from the Admin panel, as CSV. This is what we will process. # |
| 7 | +############################################################################################################### |
| 8 | + |
| 9 | +import elabapi_python |
| 10 | +import argparse |
| 11 | +import csv |
| 12 | +import json |
| 13 | +import sys |
| 14 | + |
| 15 | +# use the locally defined client.py module to get the api_client object, fully configured and ready to be used to instantiate api objects |
| 16 | +from client import api_client |
| 17 | + |
| 18 | + |
| 19 | +items_client = elabapi_python.ItemsApi(api_client) |
| 20 | + |
| 21 | +parser = argparse.ArgumentParser( |
| 22 | + description="This script extracts the CAS extra field from an eLabFTW CSV export of a resources category and saves it into a new csv file." |
| 23 | + ) |
| 24 | +parser.add_argument( |
| 25 | + "csv_path", |
| 26 | + type=str, |
| 27 | + help="Path to the input CSV file" |
| 28 | +) |
| 29 | +args = parser.parse_args() |
| 30 | +cas_list = [] |
| 31 | +try: |
| 32 | + with open(args.csv_path, mode="r", newline="", encoding="utf-8") as csvfile: |
| 33 | + reader = csv.DictReader(csvfile) |
| 34 | + if "metadata" not in reader.fieldnames: |
| 35 | + print("Error: 'metadata' column not found in CSV.", file=sys.stderr) |
| 36 | + sys.exit(1) |
| 37 | + |
| 38 | + for i, row in enumerate(reader, start=1): |
| 39 | + raw = row["metadata"] |
| 40 | + try: |
| 41 | + metadata = json.loads(raw) |
| 42 | + except json.JSONDecodeError as e: |
| 43 | + print(f"Row {i}: could not parse JSON: {e}", file=sys.stderr) |
| 44 | + continue |
| 45 | + |
| 46 | + print(f"Row {i} metadata:", metadata) |
| 47 | + extra = metadata.get("extra_fields", {}) |
| 48 | + cas_entry = extra.get("CAS") |
| 49 | + if cas_entry and "value" in cas_entry: |
| 50 | + cas_value = cas_entry["value"] |
| 51 | + print(f"Row {i} CAS value: {cas_value}") |
| 52 | + cas_list.append(cas_value) |
| 53 | + else: |
| 54 | + print(f"Row {i}: no CAS field found", file=sys.stderr) |
| 55 | + |
| 56 | +except FileNotFoundError: |
| 57 | + print(f"Error: file '{args.file_path}' does not exist.", file=sys.stderr) |
| 58 | + sys.exit(1) |
| 59 | + |
| 60 | +# Write the collected CAS values to the output CSV |
| 61 | +try: |
| 62 | + out_path = args.csv_path + "-cas.csv" |
| 63 | + with open(out_path, mode="w", newline="", encoding="utf-8") as outcsv: |
| 64 | + writer = csv.writer(outcsv) |
| 65 | + writer.writerow(["cas"]) |
| 66 | + for cas in cas_list: |
| 67 | + writer.writerow([cas]) |
| 68 | + print(f"Wrote {len(cas_list)} CAS values to {out_path}") |
| 69 | +except IOError as e: |
| 70 | + print(f"Error writing to '{out_path}': {e}", file=sys.stderr) |
| 71 | + sys.exit(1) |
0 commit comments