Fix: formatting issues.

SeanClay10 · SeanClay10 · commit 82905ddf7fa1 · 2026-01-18T13:28:25.000-08:00
diff --git a/src/llm/local_llm.py b/src/llm/local_llm.py
@@ -5,7 +5,7 @@
     python extract_metrics.py path/to/text_file.txt --model llama3.1:8b
     python extract_metrics.py path/to/text_file.txt --output-dir results/
 
-This script uses Ollama to extract structured data from preprocessed predator diet 
+This script uses Ollama to extract structured data from preprocessed predator diet
 surveys, including species name, study date, location, and stomach content data.
 """
 
@@ -21,40 +21,22 @@
 
 class PredatorDietMetrics(BaseModel):
     """Structured schema for extracted predator diet survey metrics."""
-    
-    species_name: Optional[str] = Field(
-        None,
-        description="Scientific name of the predator species studied"
-    )
-    study_location: Optional[str] = Field(
-        None,
-        description="Geographic location where the study was conducted"
-    )
-    study_date: Optional[str] = Field(
-        None,
-        description="Year or date range when the study was conducted"
-    )
-    num_empty_stomachs: Optional[int] = Field(
-        None,
-        description="Number of predators with empty stomachs"
-    )
-    num_nonempty_stomachs: Optional[int] = Field(
-        None,
-        description="Number of predators with non-empty stomachs"
-    )
-    sample_size: Optional[int] = Field(
-        None,
-        description="Total number of predators surveyed"
-    )
+
+    species_name: Optional[str] = Field(None, description="Scientific name of the predator species studied")
+    study_location: Optional[str] = Field(None, description="Geographic location where the study was conducted")
+    study_date: Optional[str] = Field(None, description="Year or date range when the study was conducted")
+    num_empty_stomachs: Optional[int] = Field(None, description="Number of predators with empty stomachs")
+    num_nonempty_stomachs: Optional[int] = Field(None, description="Number of predators with non-empty stomachs")
+    sample_size: Optional[int] = Field(None, description="Total number of predators surveyed")
 
 
 def extract_metrics_from_text(text: str, model: str = "llama3.1:8b") -> PredatorDietMetrics:
     """Extract structured metrics from text using Ollama.
-    
+
     Args:
         text: Preprocessed text content from a scientific publication
         model: Name of the Ollama model to use
-        
+
     Returns:
         PredatorDietMetrics object with extracted data
     """
@@ -94,24 +76,24 @@ def extract_metrics_from_text(text: str, model: str = "llama3.1:8b") -> Predator
         model=model,
         format=PredatorDietMetrics.model_json_schema(),
     )
-    
+
     metrics = PredatorDietMetrics.model_validate_json(response.message.content)
     return metrics
 
 
 def validate_and_calculate(metrics: dict) -> dict:
     """Validate extracted metrics and calculate derived values.
-    
+
     Args:
         metrics: Dictionary of extracted metrics
-        
+
     Returns:
         Dictionary with validated metrics and calculated fraction_feeding
     """
     empty = metrics.get("num_empty_stomachs")
     nonempty = metrics.get("num_nonempty_stomachs")
     sample = metrics.get("sample_size")
-    
+
     # Validate and fix sample size if needed
     if empty is not None and nonempty is not None:
         calculated_sample = empty + nonempty
@@ -122,83 +104,64 @@ def validate_and_calculate(metrics: dict) -> dict:
             # LLM made an error, use calculated value
             metrics["sample_size"] = calculated_sample
             sample = calculated_sample
-    
+
     # Calculate fraction of feeding predators
     fraction_feeding = None
     if nonempty is not None and sample is not None and sample > 0:
         fraction_feeding = round(nonempty / sample, 4)
-    
+
     metrics["fraction_feeding"] = fraction_feeding
-    
+
     return metrics
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description="Extract predator diet metrics from preprocessed text using LLM"
-    )
-    parser.add_argument(
-        "text_file",
-        type=str,
-        help="Path to the preprocessed text file"
-    )
-    parser.add_argument(
-        "--model",
-        type=str,
-        default="llama3.1:8b",
-        help="Ollama model to use (default: llama3.1:8b)"
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=str,
-        default="data/results",
-        help="Output directory for JSON results (default: data/results)"
-    )
-    
+    parser = argparse.ArgumentParser(description="Extract predator diet metrics from preprocessed text using LLM")
+    parser.add_argument("text_file", type=str, help="Path to the preprocessed text file")
+    parser.add_argument("--model", type=str, default="llama3.1:8b", help="Ollama model to use (default: llama3.1:8b)")
+    parser.add_argument("--output-dir", type=str, default="data/results", help="Output directory for JSON results (default: data/results)")
+
     args = parser.parse_args()
-    
+
     # Load text file
     text_path = Path(args.text_file)
     if not text_path.exists():
         print(f"[ERROR] File not found: {text_path}", file=sys.stderr)
         sys.exit(1)
-    
+
     try:
         with open(text_path, "r", encoding="utf-8") as f:
             text = f.read()
     except Exception as e:
         print(f"[ERROR] Failed to read file: {e}", file=sys.stderr)
         sys.exit(1)
-    
+
     # Extract metrics
     print(f"Extracting metrics from {text_path.name}...", file=sys.stderr)
     try:
         metrics = extract_metrics_from_text(text, model=args.model)
     except Exception as e:
         print(f"[ERROR] Extraction failed: {e}", file=sys.stderr)
         sys.exit(1)
-    
+
     # Validate and calculate derived metrics
     metrics_dict = metrics.model_dump()
     metrics_dict = validate_and_calculate(metrics_dict)
-    
+
     # Prepare output
-    result = {
-        "source_file": text_path.name,
-        "metrics": metrics_dict
-    }
-    
+    result = {"source_file": text_path.name, "metrics": metrics_dict}
+
     # Generate output filename: input_name_results.json
     output_filename = text_path.stem + "_results.json"
     output_path = Path(args.output_dir) / output_filename
-    
+
     # Save results
     output_path.parent.mkdir(parents=True, exist_ok=True)
     with open(output_path, "w", encoding="utf-8") as f:
         json.dump(result, f, indent=2)
-    
+
     print(f"Results saved to {output_path}", file=sys.stderr)
 
 
 if __name__ == "__main__":
-    main()
+    main()