diff --git a/docs/stru_validator.md b/docs/stru_validator.md new file mode 100644 index 0000000..af062be --- /dev/null +++ b/docs/stru_validator.md @@ -0,0 +1,514 @@ +# STRU File Validation Tool + +## Overview + +The `validate_stru` tool provides comprehensive validation for ABACUS STRU files, checking file structure, format correctness, physical validity, and providing detailed error messages with actionable suggestions for fixing issues. + +## Features + +- **File Structure Validation**: Checks for required sections (ATOMIC_SPECIES, LATTICE_CONSTANT, LATTICE_VECTORS, ATOMIC_POSITIONS) +- **Format Validation**: Validates data types, ranges, and formats for all sections +- **Consistency Checks**: Ensures consistency across sections (e.g., elements in ATOMIC_POSITIONS exist in ATOMIC_SPECIES) +- **Physical Validity**: Checks for physically plausible values (atom distances, cell volume, magnetic moments) +- **Detailed Error Messages**: Provides specific error locations and actionable fix suggestions +- **Strict Mode**: Optional mode that treats warnings as errors +- **File Reference Checking**: Optionally verifies that pseudopotential and orbital files exist + +## Usage + +### Basic Usage + +```python +from abacusagent.modules.submodules.stru_validator import validate_stru + +# Validate a STRU file +result = validate_stru("STRU") + +if result['valid']: + print("✓ STRU file is valid") +else: + print("✗ Validation failed:") + for error in result['errors']: + print(f" {error}") +``` + +### MCP Tool Usage + +When using through the MCP server: + +```python +# The tool is automatically registered as an MCP tool +# and can be called by LLMs through the MCP protocol + +result = validate_stru( + stru_file="path/to/STRU", + check_file_existence=True, + strict_mode=False +) +``` + +### Parameters + +- **stru_file** (str): Path to the STRU file to validate (relative or absolute) +- **check_file_existence** (bool, default=True): Whether to check if referenced pseudopotential and orbital files exist +- **strict_mode** (bool, default=False): If True, treat warnings as errors and fail validation + +### Return Value + +Returns a dictionary with the following structure: + +```python +{ + "valid": bool, # Overall validation status + "errors": List[str], # Critical issues (must fix) + "warnings": List[str], # Potential issues (should review) + "suggestions": List[str], # Improvement recommendations + "summary": str, # Human-readable summary + "details": { # Detailed results by category + "file_structure": {...}, + "atomic_species": {...}, + "numerical_orbital": {...}, + "lattice_constant": {...}, + "lattice_vectors": {...}, + "atomic_positions": {...}, + "consistency": {...}, + "physical_validity": {...} + } +} +``` + +## Validation Categories + +### 1. File Structure +- Checks for presence of all required sections +- Validates section ordering (warnings only) + +### 2. ATOMIC_SPECIES +- At least one element defined +- Valid element labels (no duplicates) +- Positive masses +- Pseudopotential file references + +### 3. NUMERICAL_ORBITAL (if present) +- Number of orbital files matches number of elements +- Orbital file references + +### 4. LATTICE_CONSTANT +- Single positive float value +- Reasonable range (warns if < 0.1 or > 100 Angstrom) + +### 5. LATTICE_VECTORS +- Exactly 3 vectors with 3 components each +- Non-singular cell matrix (determinant ≠ 0) +- Reasonable cell volume + +### 6. ATOMIC_POSITIONS +- Valid coordinate type (Direct, Cartesian, Cartesian_angstrom, Cartesian_au, Cartesian_angstrom_center_xy/xz/yz/xyz) +- At least 3 coordinates (x, y, z) per atom +- Direct coordinates typically in [0, 1] (warning if outside) +- Optional atom attributes (see Atom Attributes section below) + +### 7. Consistency +- All elements in ATOMIC_POSITIONS exist in ATOMIC_SPECIES +- No duplicate element blocks +- Total atom count > 0 + +### 8. Physical Validity +- Atoms not too close together (< 0.00053 Angstrom / 1e-3 Bohr) +- Reasonable magnetic moments (|mag| < 10) + +## Atom Attributes + +The validator supports parsing and validation of optional atom attributes that can appear after atomic coordinates in the ATOMIC_POSITIONS section. These attributes match the ABACUS C++ implementation (read_atoms.cpp:206-316). + +### Supported Attributes + +#### 1. Movement Constraints +Controls which directions an atom can move during relaxation/MD. + +**New format (recommended):** +``` +0.0 0.0 0.0 m 1 1 0 +``` +- `m`: keyword +- Three values: 0 (frozen) or 1 (movable) for x, y, z directions + +**Old format (deprecated):** +``` +0.0 0.0 0.0 0 0 1 +``` +- Three numeric values immediately after coordinates +- Still supported but triggers deprecation warning + +**Validation:** +- Values must be 0 or 1 +- Deprecation warning for old format + +#### 2. Velocities +Initial velocities for molecular dynamics. + +**Format:** +``` +0.0 0.0 0.0 v 1.0 2.0 3.0 +``` +or +``` +0.0 0.0 0.0 vel 1.0 2.0 3.0 +0.0 0.0 0.0 velocity 1.0 2.0 3.0 +``` +- Keywords: `v`, `vel`, or `velocity` +- Three float values for vx, vy, vz + +**Validation:** +- Must have exactly 3 numeric values + +#### 3. Magnetic Moments +Initial magnetic moments for spin-polarized calculations. + +**Scalar format (z-component only):** +``` +0.0 0.0 0.0 mag 2.0 +``` + +**Vector format (x, y, z components):** +``` +0.0 0.0 0.0 mag 1.0 2.0 3.0 +``` +- Keywords: `mag` or `magmom` +- 1 value (scalar) or 3 values (vector) + +**Validation:** +- Cannot use both vector magnetic moment and angles on same atom (ERROR) + +#### 4. Angles +Alternative way to specify magnetic moment direction using spherical coordinates. + +**Format:** +``` +0.0 0.0 0.0 angle1 45.0 angle2 90.0 +``` +- `angle1`: polar angle (degrees) +- `angle2`: azimuthal angle (degrees) + +**Validation:** +- Warning if outside [-360, 360] degrees +- Cannot use with vector magnetic moment (ERROR) + +#### 5. Lambda Parameters (DFT+U) +Hubbard U parameters for DFT+U calculations. + +**Scalar format (z-component only):** +``` +0.0 0.0 0.0 lambda 0.5 +``` + +**Vector format (x, y, z components):** +``` +0.0 0.0 0.0 lambda 0.1 0.2 0.3 +``` +- Keyword: `lambda` +- 1 value (scalar) or 3 values (vector) + +#### 6. Spin Constraints +Constrain spin direction during calculations. + +**Scalar format (z-component only):** +``` +0.0 0.0 0.0 sc 1.0 +``` + +**Vector format (x, y, z components):** +``` +0.0 0.0 0.0 sc 0.1 0.2 0.3 +``` +- Keyword: `sc` +- 1 value (scalar) or 3 values (vector) + +### Multiple Attributes + +Multiple attributes can be specified on the same line: + +``` +0.0 0.0 0.0 m 1 1 0 v 0.1 0.2 0.3 mag 2.0 +``` + +### Comments + +Attributes support inline comments: + +``` +0.0 0.0 0.0 m 1 1 0 mag 2.0 # frozen in xy, mag moment 2.0 +``` + +### Validation Results + +Attribute validation results are included in the `details["atomic_positions"]["attributes"]` section: + +```python +{ + "total_atoms_with_attributes": int, + "movement_constraints": { + "count": int, + "old_format_count": int, + "new_format_count": int + }, + "velocities": { + "count": int + }, + "magnetic_moments": { + "scalar_count": int, + "vector_count": int, + "angle_count": int, + "conflicts": [] # Atoms with both vector mag and angles + }, + "lambda_parameters": { + "scalar_count": int, + "vector_count": int + }, + "spin_constraints": { + "scalar_count": int, + "vector_count": int + }, + "issues": [] +} +``` + +### Attribute Validation Errors + +#### Invalid Movement Values +``` +ERROR: [ATOMIC_POSITIONS] Invalid movement constraint values + Element: H, Atom: 1 + Values: 1 2 0 + Expected: Each value must be 0 (frozen) or 1 (movable) + Fix: Use 0 to freeze or 1 to allow movement in each direction +``` + +#### Conflicting Magnetic Specifications +``` +ERROR: [ATOMIC_POSITIONS] Conflicting magnetic moment specifications + Element: H, Atom: 1 + Found: Vector magnetic moment AND angles + Fix: Use either vector magnetic moment (mag x y z) OR angles (angle1/angle2), not both +``` + +#### Angle Out of Range +``` +WARNING: [ATOMIC_POSITIONS] Angle outside reasonable range + Element: H, Atom: 1 + angle1: 500.0 degrees + Reasonable range: [-360.0, 360.0] + Suggestion: Verify this is the intended value +``` + +#### Deprecated Format +``` +WARNING: [ATOMIC_POSITIONS] Deprecated movement constraint format + Format: Numeric values after coordinates (e.g., '0 0 1') + Suggestion: Use new keyword format: 'm 0 0 1' + Note: Old format still works but may be removed in future versions +``` + +### Example: Complete STRU with Attributes + +``` +ATOMIC_SPECIES +Ni 58.693 Ni_ONCV_PBE-1.0.upf +O 15.999 O_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +1.889726 + +LATTICE_VECTORS +4.17 2.085 2.085 +2.085 4.17 2.085 +2.085 2.085 4.17 + +ATOMIC_POSITIONS +Direct + +Ni +0.0 +2 +0.0 0.0 0.0 m 0 0 1 mag 2.0 +0.5 0.5 0.5 m 1 1 1 mag -2.0 + +O +0.0 +2 +0.25 0.25 0.25 m 0 0 0 mag 0.0 +0.75 0.75 0.75 m 1 0 1 mag 0.0 +``` + +This file will validate successfully with: +- 4 movement constraints (new format) +- 4 scalar magnetic moments +- 1 deprecation warning if old format is used + + + +## Examples + +### Example 1: Basic Validation + +```python +result = validate_stru("STRU") + +if result['valid']: + print(f"✓ {result['summary']}") + if result['warnings']: + print(f"\nWarnings: {len(result['warnings'])}") + for warning in result['warnings']: + print(f" {warning}") +``` + +### Example 2: Strict Mode + +```python +# Strict mode treats warnings as errors +result = validate_stru("STRU", strict_mode=True) + +if not result['valid']: + print("Validation failed in strict mode") + print(f"Errors: {len(result['errors'])}") + print(f"Warnings: {len(result['warnings'])}") +``` + +### Example 3: Skip File Existence Checks + +```python +# Useful when pseudopotential/orbital files are in a different location +result = validate_stru("STRU", check_file_existence=False) +``` + +### Example 4: Detailed Information + +```python +result = validate_stru("STRU") + +# Access detailed validation results +print("Lattice constant:", result['details']['lattice_constant']['value']) +print("Cell volume:", result['details']['lattice_vectors']['volume']) +print("Total atoms:", result['details']['consistency']['total_atoms']) + +# Check specific sections +for elem in result['details']['atomic_species']['elements']: + print(f"Element {elem['label']}: mass={elem['mass']}") +``` + +## Error Message Format + +### Errors (Critical Issues) + +``` +ERROR: [Section] Description + Location: Line X or Section Y + Found: + Expected: + Fix: +``` + +### Warnings (Potential Issues) + +``` +WARNING: [Section] Description + Location: Line X or Section Y + Details: + Suggestion: +``` + +### Suggestions (Improvements) + +``` +SUGGESTION: + Reason: +``` + +## Common Validation Errors + +### Missing Required Section + +``` +ERROR: [File Structure] Required section missing + Section: LATTICE_VECTORS + Fix: Add the LATTICE_VECTORS section to the STRU file +``` + +### Duplicate Element Labels + +``` +ERROR: [ATOMIC_SPECIES] Duplicate element label + Label: Ga + Fix: Each element label must be unique +``` + +### Singular Cell Matrix + +``` +ERROR: [LATTICE_VECTORS] Singular cell matrix + Determinant: 0.0 + Fix: Lattice vectors must be linearly independent +``` + +### Element Mismatch + +``` +ERROR: [Consistency] Element in ATOMIC_POSITIONS not in ATOMIC_SPECIES + Element: As + Fix: Add As to ATOMIC_SPECIES section +``` + +## Integration with ABACUS Workflows + +The validation tool is designed to be used before running ABACUS calculations: + +```python +# Validate STRU file before preparing calculation +result = validate_stru("STRU") + +if result['valid']: + # Proceed with ABACUS preparation + abacus_prepare(...) +else: + # Report errors to user + print("Please fix the following errors:") + for error in result['errors']: + print(error) +``` + +## Testing + +Run the unit tests: + +```bash +pytest tests/test_stru_validator.py -v +``` + +Run the example script: + +```bash +python examples/validate_stru_example.py +``` + +## Implementation Details + +- **Parser**: Uses `AbacusStru.ReadStru()` for valid files, falls back to manual parsing for invalid files +- **Error Handling**: Gracefully handles `sys.exit()` calls from `AbacusStru` to provide better error messages +- **Dual Format Support**: Handles both `AbacusStru` object format and manual parser format +- **Comprehensive Coverage**: Validates all major sections and common error cases + +## Limitations + +- File existence checks are relative to the STRU file directory +- Physical validity checks are heuristic-based (e.g., minimum atom distance threshold) +- Attribute parsing focuses on validation; runtime operations (unit conversions, default values) are handled by ABACUS + +## Future Enhancements + +Potential improvements for future versions: + +- Validation of advanced ABACUS features (DFT+U, vdW corrections, etc.) +- Integration with pseudopotential/orbital databases +- Automatic fixing of common issues +- Performance optimization for large STRU files +- Support for STRU file generation from validation results diff --git a/examples/validate_stru_example.py b/examples/validate_stru_example.py new file mode 100755 index 0000000..abca055 --- /dev/null +++ b/examples/validate_stru_example.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +Example script demonstrating the STRU file validation tool. + +This script shows how to use the validate_stru tool to check ABACUS STRU files +for correctness and physical validity. +""" + +import os +import sys + +# Add src to path for development +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) + +from abacusagent.modules.submodules.stru_validator import validate_stru + + +def print_validation_result(result, filename): + """Pretty print validation results.""" + print(f"\n{'='*70}") + print(f"Validation Results for: {filename}") + print(f"{'='*70}") + + print(f"\n{result['summary']}") + + if result['errors']: + print(f"\n❌ ERRORS ({len(result['errors'])}):") + print("-" * 70) + for error in result['errors']: + print(error) + print() + + if result['warnings']: + print(f"\n⚠️ WARNINGS ({len(result['warnings'])}):") + print("-" * 70) + for warning in result['warnings']: + print(warning) + print() + + if result['suggestions']: + print(f"\n💡 SUGGESTIONS ({len(result['suggestions'])}):") + print("-" * 70) + for suggestion in result['suggestions']: + print(f" • {suggestion}") + + print(f"\n{'='*70}\n") + + +def main(): + """Run validation examples.""" + + # Example 1: Validate a valid STRU file + print("\n" + "="*70) + print("Example 1: Validating a correct STRU file") + print("="*70) + + test_stru = os.path.join(os.path.dirname(__file__), '..', 'tests', 'abacus', 'STRU') + if os.path.exists(test_stru): + result = validate_stru(test_stru, check_file_existence=False) + print_validation_result(result, test_stru) + else: + print(f"Test file not found: {test_stru}") + + # Example 2: Validate with strict mode + print("\n" + "="*70) + print("Example 2: Strict mode validation") + print("="*70) + print("Strict mode treats warnings as errors.\n") + + if os.path.exists(test_stru): + result = validate_stru(test_stru, check_file_existence=False, strict_mode=True) + print(f"Valid in normal mode: {validate_stru(test_stru, check_file_existence=False)['valid']}") + print(f"Valid in strict mode: {result['valid']}") + + # Example 3: Show detailed validation information + print("\n" + "="*70) + print("Example 3: Detailed validation information") + print("="*70) + + if os.path.exists(test_stru): + result = validate_stru(test_stru, check_file_existence=False) + + print("\nFile Structure:") + print(f" Sections found: {', '.join(result['details']['file_structure']['sections_found'])}") + + print("\nAtomic Species:") + for elem in result['details']['atomic_species']['elements']: + print(f" {elem['label']}: mass={elem.get('mass', 'N/A')}") + + print("\nLattice:") + lat_const = result['details']['lattice_constant'] + print(f" Constant: {lat_const['value']} Angstrom") + + lat_vec = result['details']['lattice_vectors'] + if lat_vec.get('volume'): + print(f" Volume: {lat_vec['volume']:.2f} Angstrom^3") + + print("\nAtomic Positions:") + print(f" Coordinate type: {result['details']['atomic_positions']['coordinate_type']}") + for elem in result['details']['atomic_positions']['elements']: + print(f" {elem['label']}: {elem['actual_count']} atoms") + + if 'total_atoms' in result['details']['consistency']: + print(f"\nTotal atoms: {result['details']['consistency']['total_atoms']}") + + +if __name__ == '__main__': + main() diff --git a/src/abacusagent/modules/stru_validator.py b/src/abacusagent/modules/stru_validator.py new file mode 100644 index 0000000..47007d5 --- /dev/null +++ b/src/abacusagent/modules/stru_validator.py @@ -0,0 +1,73 @@ +""" +MCP tool for validating ABACUS STRU files. + +This module provides a comprehensive validation tool for ABACUS STRU files, +checking file structure, format correctness, physical validity, and providing +detailed error messages with actionable suggestions for fixing issues. +""" + +from pathlib import Path +from typing import Dict, Any +from abacusagent.init_mcp import mcp +from abacusagent.modules.submodules.stru_validator import validate_stru as _validate_stru + + +@mcp.tool() +def validate_stru( + stru_file: str, + check_file_existence: bool = True, + strict_mode: bool = False +) -> Dict[str, Any]: + """ + Validate an ABACUS STRU file for correctness and physical validity. + + This tool performs comprehensive validation of ABACUS STRU files, checking: + - File structure and required sections + - ATOMIC_SPECIES format and validity + - NUMERICAL_ORBITAL section (if present) + - LATTICE_CONSTANT and LATTICE_VECTORS + - ATOMIC_POSITIONS format and coordinates + - Consistency across sections + - Physical plausibility (atom distances, cell volume, etc.) + + Args: + stru_file: Path to the STRU file to validate (relative or absolute) + check_file_existence: Whether to check if referenced pseudopotential + and orbital files exist (default: True). Set to False if files + are in a different location or will be provided later. + strict_mode: If True, treat warnings as errors and fail validation + (default: False). Use this for strict validation before production runs. + + Returns: + Dictionary containing: + - valid (bool): Overall validation status (True if no errors) + - errors (list): Critical issues that must be fixed + - warnings (list): Potential issues that should be reviewed + - suggestions (list): Improvement recommendations + - summary (str): Human-readable summary of validation results + - details (dict): Detailed results organized by category: + - file_structure: Sections found and missing + - atomic_species: Element definitions and validity + - numerical_orbital: Orbital files (if present) + - lattice_constant: Lattice constant value and validity + - lattice_vectors: Cell matrix, determinant, and volume + - atomic_positions: Coordinate type and atom positions + - consistency: Cross-section consistency checks + - physical_validity: Physical plausibility checks + + Example: + >>> result = validate_stru("STRU") + >>> if result['valid']: + ... print("STRU file is valid") + ... else: + ... print("Validation failed:") + ... for error in result['errors']: + ... print(f" {error}") + + >>> # Strict mode - warnings cause failure + >>> result = validate_stru("STRU", strict_mode=True) + + >>> # Skip file existence checks + >>> result = validate_stru("STRU", check_file_existence=False) + """ + return _validate_stru(stru_file, check_file_existence, strict_mode) diff --git a/src/abacusagent/modules/submodules/stru_validator.py b/src/abacusagent/modules/submodules/stru_validator.py new file mode 100644 index 0000000..b8f9a5a --- /dev/null +++ b/src/abacusagent/modules/submodules/stru_validator.py @@ -0,0 +1,1553 @@ +""" +STRU file validation implementation. + +This module provides comprehensive validation for ABACUS STRU files, +checking file structure, format correctness, physical validity, and +providing detailed error messages with actionable suggestions. +""" + +import os +import re +import numpy as np +from pathlib import Path +from typing import Dict, Any, List, Optional, Tuple +from abacustest.lib_prepare.abacus import AbacusStru + +# Constants matching C++ implementation +BOHR_TO_ANGSTROM = 0.529177249 +MIN_DISTANCE_BOHR = 1.0e-3 +MIN_DISTANCE_ANGSTROM = MIN_DISTANCE_BOHR * BOHR_TO_ANGSTROM # ≈ 0.00053 Å + +# Valid coordinate types (from read_atoms.cpp:35-55) +VALID_COORD_TYPES = [ + "Direct", + "Cartesian", + "Cartesian_angstrom", + "Cartesian_au", + "Cartesian_angstrom_center_xy", + "Cartesian_angstrom_center_xz", + "Cartesian_angstrom_center_yz", + "Cartesian_angstrom_center_xyz" +] + +# Valid pseudopotential types (from read_atom_species.cpp:52-66) +VALID_PP_TYPES = ["auto", "upf", "vwr", "upf201", "blps", "1/r"] + +# Valid attribute keywords for atomic positions (from read_atoms.cpp:206-316) +VALID_MOVEMENT_KEYWORDS = ["m"] +VALID_VELOCITY_KEYWORDS = ["v", "vel", "velocity"] +VALID_MAG_KEYWORDS = ["mag", "magmom"] +VALID_ANGLE_KEYWORDS = ["angle1", "angle2"] +VALID_LAMBDA_KEYWORDS = ["lambda"] +VALID_SC_KEYWORDS = ["sc"] + +ALL_VALID_ATTRIBUTES = ( + VALID_MOVEMENT_KEYWORDS + + VALID_VELOCITY_KEYWORDS + + VALID_MAG_KEYWORDS + + VALID_ANGLE_KEYWORDS + + VALID_LAMBDA_KEYWORDS + + VALID_SC_KEYWORDS +) + +# Angle range validation (degrees) +REASONABLE_ANGLE_MIN = -360.0 +REASONABLE_ANGLE_MAX = 360.0 + + +class ValidationResult: + """Helper class to accumulate validation results.""" + + def __init__(self): + self.errors: List[str] = [] + self.warnings: List[str] = [] + self.suggestions: List[str] = [] + self.details: Dict[str, Any] = {} + + def add_error(self, message: str, section: str = "general"): + """Add a critical error.""" + self.errors.append(message) + + def add_warning(self, message: str, section: str = "general"): + """Add a warning.""" + self.warnings.append(message) + + def add_suggestion(self, message: str): + """Add a suggestion.""" + self.suggestions.append(message) + + def is_valid(self, strict_mode: bool = False) -> bool: + """Check if validation passed.""" + if self.errors: + return False + if strict_mode and self.warnings: + return False + return True + + def to_dict(self, strict_mode: bool = False) -> Dict[str, Any]: + """Convert to dictionary format.""" + valid = self.is_valid(strict_mode) + + # Generate summary + if valid: + summary = "✓ STRU file is valid" + if self.warnings: + summary += f" ({len(self.warnings)} warning(s))" + else: + summary = f"✗ Validation failed: {len(self.errors)} error(s)" + if self.warnings: + summary += f", {len(self.warnings)} warning(s)" + + return { + "valid": valid, + "errors": self.errors, + "warnings": self.warnings, + "suggestions": self.suggestions, + "summary": summary, + "details": self.details + } + + +def validate_stru( + stru_file: str, + check_file_existence: bool = True, + strict_mode: bool = False +) -> Dict[str, Any]: + """ + Validate an ABACUS STRU file. + + Args: + stru_file: Path to STRU file to validate + check_file_existence: Whether to check if referenced PP/orbital files exist + strict_mode: Treat warnings as errors + + Returns: + Dictionary with validation results including: + - valid: Overall validation status + - errors: List of critical issues + - warnings: List of potential issues + - suggestions: List of improvement recommendations + - summary: Human-readable summary + - details: Detailed results by category + """ + result = ValidationResult() + stru_path = Path(stru_file) + + # Check file exists + if not stru_path.exists(): + result.add_error( + f"ERROR: [File] STRU file not found\n" + f" Location: {stru_file}\n" + f" Fix: Check the file path is correct" + ) + return result.to_dict(strict_mode) + + # Try to read the file + try: + with open(stru_path, 'r') as f: + content = f.read() + lines = content.split('\n') + except Exception as e: + result.add_error( + f"ERROR: [File] Cannot read STRU file\n" + f" Location: {stru_file}\n" + f" Error: {str(e)}\n" + f" Fix: Check file permissions and encoding" + ) + return result.to_dict(strict_mode) + + # Validate file structure first + _validate_file_structure(lines, result) + + # Check for duplicate elements in raw file (before AbacusStru deduplicates) + _check_duplicate_elements(lines, result) + + # If critical structure errors, don't continue parsing + if result.errors: + return result.to_dict(strict_mode) + + # Try to parse using AbacusStru for valid files + stru = None + try: + # Temporarily redirect stdout/stderr to suppress AbacusStru warnings + import sys + import io + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = io.StringIO() + sys.stderr = io.StringIO() + + try: + stru = AbacusStru.ReadStru(str(stru_path)) + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + except SystemExit: + # AbacusStru calls sys.exit() on errors, catch it + pass + except Exception: + # Other parsing errors + pass + + # If AbacusStru parsing failed, do manual parsing + if stru is None: + stru = _manual_parse_stru(lines, result) + if stru is None: + return result.to_dict(strict_mode) + else: + # AbacusStru parsed successfully, but we still need to parse attributes + # since AbacusStru doesn't extract them + _parse_attributes_from_lines(stru, lines) + + # Validate each section + _validate_atomic_species(stru, lines, result, stru_path.parent if check_file_existence else None) + _validate_numerical_orbital(stru, lines, result, stru_path.parent if check_file_existence else None) + _validate_lattice_constant(stru, lines, result) + _validate_lattice_vectors(stru, lines, result) + _validate_atomic_positions(stru, lines, result) + _validate_atom_attributes(stru, lines, result) + _validate_consistency(stru, result) + _validate_physical(stru, result) + + return result.to_dict(strict_mode) + + +def strip_comments(line: str) -> str: + """Remove comments from line (text after #).""" + comment_pos = line.find('#') + if comment_pos >= 0: + return line[:comment_pos].strip() + return line.strip() + + +def _manual_parse_stru(lines: List[str], result: ValidationResult): + """ + Manually parse STRU file when AbacusStru.ReadStru() fails. + Returns a minimal structure object for validation. + """ + class ManualStru: + def __init__(self): + self.elements = [] + self.masses = {} + self.pp_files = {} + self.pp_types = {} + self.orb_files = {} + self.lat0 = None + self.cells = None + self.coords_type = None + self.coords = {} + self.magmoms = {} + self.empty_elements = [] + + # New fields for optional attributes + self.movement_constraints = {} # {element: [(mx, my, mz), ...]} + self.velocities = {} # {element: [(vx, vy, vz), ...]} + self.mag_scalar = {} # {element: [mag, ...]} + self.mag_vector = {} # {element: [(mx, my, mz), ...]} + self.angles = {} # {element: [(angle1, angle2), ...]} + self.lambda_params = {} # {element: [scalar or (x,y,z), ...]} + self.spin_constraints = {} # {element: [scalar or (x,y,z), ...]} + self.has_old_style_movement = False # Track deprecated format + + stru = ManualStru() + content = '\n'.join(lines) + + # Parse ATOMIC_SPECIES + if "ATOMIC_SPECIES" in content: + try: + start_idx = next(i for i, line in enumerate(lines) if "ATOMIC_SPECIES" in line) + i = start_idx + 1 + while i < len(lines): + raw_line = lines[i] + line = strip_comments(raw_line).strip() + if not line: + i += 1 + continue + if any(section in line for section in ["NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS"]): + break + parts = line.split() + if len(parts) >= 3: + label = parts[0] + # Always append to preserve duplicates for validation + stru.elements.append(label) + # Store in dict (will overwrite if duplicate, but we keep list for detection) + stru.masses[label] = float(parts[1]) + stru.pp_files[label] = parts[2] + # Check for PP type (4th column, optional) + if len(parts) >= 4: + stru.pp_types[label] = parts[3] + # Check for empty element (BSSE) + if "empty" in label.lower(): + stru.empty_elements.append(label) + i += 1 + except Exception: + pass + + # Parse LATTICE_CONSTANT + if "LATTICE_CONSTANT" in content: + try: + start_idx = next(i for i, line in enumerate(lines) if "LATTICE_CONSTANT" in line) + i = start_idx + 1 + while i < len(lines): + raw_line = lines[i] + line = strip_comments(raw_line).strip() + if line: + stru.lat0 = float(line.split()[0]) + break + i += 1 + except Exception: + pass + + # Parse LATTICE_VECTORS + if "LATTICE_VECTORS" in content: + try: + start_idx = next(i for i, line in enumerate(lines) if "LATTICE_VECTORS" in line) + vectors = [] + i = start_idx + 1 + while i < len(lines) and len(vectors) < 3: + raw_line = lines[i] + line = strip_comments(raw_line).strip() + if line: + parts = line.split() + if len(parts) >= 3: + vectors.append([float(parts[0]), float(parts[1]), float(parts[2])]) + i += 1 + if len(vectors) == 3: + stru.cells = vectors + except Exception: + pass + + # Parse ATOMIC_POSITIONS + if "ATOMIC_POSITIONS" in content: + try: + start_idx = next(i for i, line in enumerate(lines) if "ATOMIC_POSITIONS" in line) + coord_type = strip_comments(lines[start_idx + 1]).split()[0] + stru.coords_type = coord_type + + i = start_idx + 2 + while i < len(lines): + raw_line = lines[i] + line = strip_comments(raw_line).strip() + if not line: + i += 1 + continue + + # Check if this is an element label (single word on a line) + parts = line.split() + if len(parts) == 1: + elem = parts[0] + i += 1 + # Read magnetism + if i < len(lines): + mag_line = strip_comments(lines[i]).strip() + try: + mag = float(mag_line.split()[0]) + stru.magmoms[elem] = [] + except: + pass + i += 1 + # Read atom count + if i < len(lines): + count_line = strip_comments(lines[i]).strip() + try: + count = int(count_line.split()[0]) + stru.coords[elem] = [] + i += 1 + # Read coordinates + for _ in range(count): + if i < len(lines): + coord_line = strip_comments(lines[i]).strip() + if coord_line: + coord_parts = coord_line.split() + if len(coord_parts) >= 3: + coords = [float(coord_parts[0]), float(coord_parts[1]), float(coord_parts[2])] + stru.coords[elem].append(coords) + + # Parse optional attributes + attrs = _parse_atom_attributes(coord_line, coord_parts) + if attrs: + # Store movement constraints + if attrs['movement'] is not None: + if elem not in stru.movement_constraints: + stru.movement_constraints[elem] = [] + stru.movement_constraints[elem].append(attrs['movement']) + if attrs['old_style_movement']: + stru.has_old_style_movement = True + + # Store velocities + if attrs['velocity'] is not None: + if elem not in stru.velocities: + stru.velocities[elem] = [] + stru.velocities[elem].append(attrs['velocity']) + + # Store magnetic moments + if attrs['mag_scalar'] is not None: + if elem not in stru.mag_scalar: + stru.mag_scalar[elem] = [] + stru.mag_scalar[elem].append(attrs['mag_scalar']) + + if attrs['mag_vector'] is not None: + if elem not in stru.mag_vector: + stru.mag_vector[elem] = [] + stru.mag_vector[elem].append(attrs['mag_vector']) + + # Store angles + if attrs['angle1'] is not None or attrs['angle2'] is not None: + if elem not in stru.angles: + stru.angles[elem] = [] + stru.angles[elem].append((attrs['angle1'], attrs['angle2'])) + + # Store lambda parameters + if attrs['lambda_scalar'] is not None: + if elem not in stru.lambda_params: + stru.lambda_params[elem] = [] + stru.lambda_params[elem].append(attrs['lambda_scalar']) + + if attrs['lambda_vector'] is not None: + if elem not in stru.lambda_params: + stru.lambda_params[elem] = [] + stru.lambda_params[elem].append(attrs['lambda_vector']) + + # Store spin constraints + if attrs['sc_scalar'] is not None: + if elem not in stru.spin_constraints: + stru.spin_constraints[elem] = [] + stru.spin_constraints[elem].append(attrs['sc_scalar']) + + if attrs['sc_vector'] is not None: + if elem not in stru.spin_constraints: + stru.spin_constraints[elem] = [] + stru.spin_constraints[elem].append(attrs['sc_vector']) + i += 1 + except: + i += 1 + else: + i += 1 + except Exception: + pass + + return stru + + +def _check_duplicate_elements(lines: List[str], result: ValidationResult): + """Check for duplicate element labels in ATOMIC_SPECIES section.""" + content = '\n'.join(lines) + if "ATOMIC_SPECIES" not in content: + return + + try: + start_idx = next(i for i, line in enumerate(lines) if "ATOMIC_SPECIES" in line) + i = start_idx + 1 + seen_labels = set() + + while i < len(lines): + raw_line = lines[i] + line = strip_comments(raw_line).strip() + if not line: + i += 1 + continue + if any(section in line for section in ["NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS"]): + break + + parts = line.split() + if len(parts) >= 3: + label = parts[0] + if label in seen_labels: + result.add_error( + f"ERROR: [ATOMIC_SPECIES] Duplicate element label\n" + f" Label: {label}\n" + f" Fix: Each element label must be unique" + ) + seen_labels.add(label) + i += 1 + except Exception: + pass + + +def _validate_file_structure(lines: List[str], result: ValidationResult): + """Validate overall file structure.""" + details = {"sections_found": [], "sections_missing": []} + + required_sections = [ + "ATOMIC_SPECIES", + "LATTICE_CONSTANT", + "LATTICE_VECTORS", + "ATOMIC_POSITIONS" + ] + + content = '\n'.join(lines) + + for section in required_sections: + if section in content: + details["sections_found"].append(section) + else: + details["sections_missing"].append(section) + result.add_error( + f"ERROR: [File Structure] Required section missing\n" + f" Section: {section}\n" + f" Fix: Add the {section} section to the STRU file" + ) + + result.details["file_structure"] = details + + +def _validate_atomic_species( + stru: AbacusStru, + lines: List[str], + result: ValidationResult, + base_path: Optional[Path] = None +): + """Validate ATOMIC_SPECIES section.""" + details = {"elements": [], "issues": [], "empty_elements": []} + + # Get elements from AbacusStru object + elements = getattr(stru, '_element', None) or getattr(stru, 'elements', None) + if not elements: + result.add_error( + f"ERROR: [ATOMIC_SPECIES] No elements defined\n" + f" Fix: Add at least one element to ATOMIC_SPECIES section" + ) + result.details["atomic_species"] = details + return + + seen_labels = set() + masses = getattr(stru, '_mass', None) or getattr(stru, 'masses', None) + pp_files = getattr(stru, '_pp', None) or getattr(stru, 'pp_files', None) + + # Get labels (which may differ from elements, e.g., "H_empty" -> "H") + labels = getattr(stru, '_label', None) + + # Parse PP types from raw file (not available in AbacusStru) + pp_types = {} + content = '\n'.join(lines) + if "ATOMIC_SPECIES" in content: + try: + start_idx = next(i for i, line in enumerate(lines) if "ATOMIC_SPECIES" in line) + i = start_idx + 1 + while i < len(lines): + raw_line = lines[i] + line = strip_comments(raw_line).strip() + if not line: + i += 1 + continue + if any(section in line for section in ["NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS"]): + break + parts = line.split() + if len(parts) >= 4: + label = parts[0] + pp_types[label] = parts[3] + i += 1 + except Exception: + pass + + # Iterate over elements (or labels if available) + elem_list = labels if labels else elements + for i, elem in enumerate(elem_list): + elem_info = {"label": elem, "valid": True} + + # Check for duplicate labels + if elem in seen_labels: + result.add_error( + f"ERROR: [ATOMIC_SPECIES] Duplicate element label\n" + f" Label: {elem}\n" + f" Fix: Each element label must be unique" + ) + elem_info["valid"] = False + seen_labels.add(elem) + + # Priority 2.1: Check for empty element (BSSE calculations) + if "empty" in elem.lower(): + result.add_suggestion( + f"SUGGESTION: Element '{elem}' detected as empty atom\n" + f" Purpose: For BSSE (Basis Set Superposition Error) calculations\n" + f" Note: Empty atoms use ghost basis functions" + ) + details["empty_elements"].append(elem) + + # Check mass + if masses: + # Handle both dict and list formats + if isinstance(masses, dict): + mass = masses.get(elem) + elif isinstance(masses, list) and i < len(masses): + mass = masses[i] + else: + mass = None + + if mass is not None: + if mass <= 0: + result.add_error( + f"ERROR: [ATOMIC_SPECIES] Invalid mass\n" + f" Element: {elem}\n" + f" Mass: {mass}\n" + f" Expected: Positive float\n" + f" Fix: Set mass to a positive value" + ) + elem_info["valid"] = False + elem_info["mass"] = mass + + # Check pseudopotential file + if pp_files: + # Handle both dict and list formats + if isinstance(pp_files, dict): + pp_file = pp_files.get(elem) + elif isinstance(pp_files, list) and i < len(pp_files): + pp_file = pp_files[i] + else: + pp_file = None + + if pp_file: + elem_info["pp_file"] = pp_file + + if base_path and pp_file: + pp_path = base_path / pp_file + if not pp_path.exists(): + result.add_warning( + f"WARNING: [ATOMIC_SPECIES] Pseudopotential file not found\n" + f" Element: {elem}\n" + f" File: {pp_file}\n" + f" Suggestion: Check the file path or set ABACUS_PP_PATH" + ) + + # Priority 1.2: Validate pseudopotential type + if elem in pp_types: + pp_type = pp_types[elem] + elem_info["pp_type"] = pp_type + + if pp_type not in VALID_PP_TYPES: + result.add_error( + f"ERROR: [ATOMIC_SPECIES] Invalid pseudopotential type\n" + f" Element: {elem}\n" + f" Type: {pp_type}\n" + f" Valid types: {', '.join(VALID_PP_TYPES)}\n" + f" Fix: Use a valid PP type or omit for 'auto'" + ) + elem_info["valid"] = False + elif pp_type == "1/r": + elem_info["coulomb_potential"] = True + + details["elements"].append(elem_info) + + result.details["atomic_species"] = details + + +def _validate_numerical_orbital( + stru: AbacusStru, + lines: List[str], + result: ValidationResult, + base_path: Optional[Path] = None +): + """Validate NUMERICAL_ORBITAL section if present.""" + details = {"present": False, "orbital_files": []} + + # Check if NUMERICAL_ORBITAL section exists + content = '\n'.join(lines) + if "NUMERICAL_ORBITAL" not in content: + result.details["numerical_orbital"] = details + return + + details["present"] = True + + orb_files = getattr(stru, '_orb', None) or getattr(stru, 'orb_files', None) + elements = getattr(stru, '_element', None) or getattr(stru, 'elements', None) + + if orb_files: + for i, orb_file in enumerate(orb_files): + elem = elements[i] if elements and i < len(elements) else f"Element_{i}" + orb_info = {"element": elem, "file": orb_file, "exists": None} + + if base_path and orb_file: + orb_path = base_path / orb_file + orb_info["exists"] = orb_path.exists() + if not orb_path.exists(): + result.add_warning( + f"WARNING: [NUMERICAL_ORBITAL] Orbital file not found\n" + f" Element: {elem}\n" + f" File: {orb_file}\n" + f" Suggestion: Check the file path or set ABACUS_ORB_PATH" + ) + + details["orbital_files"].append(orb_info) + + # Check if number of orbital files matches number of elements + if elements and len(orb_files) != len(elements): + result.add_warning( + f"WARNING: [NUMERICAL_ORBITAL] Orbital file count mismatch\n" + f" Elements: {len(elements)}\n" + f" Orbital files: {len(orb_files)}\n" + f" Suggestion: Provide orbital files for all elements" + ) + + result.details["numerical_orbital"] = details + + +def _validate_lattice_constant(stru: AbacusStru, lines: List[str], result: ValidationResult): + """Validate LATTICE_CONSTANT section.""" + details = {"value": None, "valid": True} + + lat0 = getattr(stru, '_lattice_constant', None) or getattr(stru, 'lat0', None) + if lat0 is None: + result.add_error( + f"ERROR: [LATTICE_CONSTANT] Lattice constant not defined\n" + f" Fix: Add LATTICE_CONSTANT section with a positive value" + ) + details["valid"] = False + result.details["lattice_constant"] = details + return + + details["value"] = lat0 + + if lat0 <= 0: + result.add_error( + f"ERROR: [LATTICE_CONSTANT] Invalid lattice constant\n" + f" Value: {lat0}\n" + f" Expected: Positive float\n" + f" Fix: Set lattice constant to a positive value" + ) + details["valid"] = False + elif lat0 < 0.1: + result.add_warning( + f"WARNING: [LATTICE_CONSTANT] Unusually small lattice constant\n" + f" Value: {lat0} Angstrom\n" + f" Suggestion: Verify this is the intended value" + ) + elif lat0 > 100: + result.add_warning( + f"WARNING: [LATTICE_CONSTANT] Unusually large lattice constant\n" + f" Value: {lat0} Angstrom\n" + f" Suggestion: Verify this is the intended value" + ) + + result.details["lattice_constant"] = details + + +def _validate_lattice_vectors(stru: AbacusStru, lines: List[str], result: ValidationResult): + """Validate LATTICE_VECTORS section.""" + details = {"vectors": None, "determinant": None, "volume": None, "valid": True, "left_handed": False} + + cells = getattr(stru, '_cell', None) or getattr(stru, 'cells', None) + if cells is None: + result.add_error( + f"ERROR: [LATTICE_VECTORS] Lattice vectors not defined\n" + f" Fix: Add LATTICE_VECTORS section with 3 vectors" + ) + details["valid"] = False + result.details["lattice_vectors"] = details + return + + cells = np.array(cells) + details["vectors"] = cells.tolist() + + # Check shape + if cells.shape != (3, 3): + result.add_error( + f"ERROR: [LATTICE_VECTORS] Invalid lattice vectors shape\n" + f" Shape: {cells.shape}\n" + f" Expected: (3, 3)\n" + f" Fix: Provide exactly 3 vectors with 3 components each" + ) + details["valid"] = False + result.details["lattice_vectors"] = details + return + + # Check determinant (non-singular) + det = np.linalg.det(cells) + + # Priority 1.4: Left-handed lattice detection + if det < 0: + result.add_warning( + f"WARNING: [LATTICE_VECTORS] Left-handed lattice detected\n" + f" Determinant: {det:.6e}\n" + f" Note: Using absolute value for volume calculation\n" + f" Suggestion: Consider using right-handed coordinate system" + ) + details["left_handed"] = True + det = abs(det) + + details["determinant"] = float(det) + + if abs(det) < 1e-10: + result.add_error( + f"ERROR: [LATTICE_VECTORS] Singular cell matrix\n" + f" Determinant: {det}\n" + f" Fix: Lattice vectors must be linearly independent" + ) + details["valid"] = False + + # Calculate volume + lat0 = getattr(stru, '_lattice_constant', None) or getattr(stru, 'lat0', None) + if lat0: + volume = abs(det) * (lat0 ** 3) + details["volume"] = float(volume) + + if volume < 1.0: + result.add_warning( + f"WARNING: [LATTICE_VECTORS] Unusually small cell volume\n" + f" Volume: {volume:.2f} Angstrom^3\n" + f" Suggestion: Verify lattice vectors and constant are correct" + ) + elif volume > 100000: + result.add_warning( + f"WARNING: [LATTICE_VECTORS] Unusually large cell volume\n" + f" Volume: {volume:.2f} Angstrom^3\n" + f" Suggestion: Verify lattice vectors and constant are correct" + ) + + result.details["lattice_vectors"] = details + + +def _parse_attributes_from_lines(stru: AbacusStru, lines: List[str]) -> None: + """ + Parse optional atom attributes from raw lines and add to stru object. + + This is needed because AbacusStru.ReadStru() doesn't extract optional attributes. + + Args: + stru: Parsed STRU structure (from AbacusStru.ReadStru()) + lines: Raw file lines + """ + # Initialize attribute storage on stru object + stru.movement_constraints = {} + stru.velocities = {} + stru.mag_scalar = {} + stru.mag_vector = {} + stru.angles = {} + stru.lambda_params = {} + stru.spin_constraints = {} + stru.has_old_style_movement = False + + # Find ATOMIC_POSITIONS section + content = '\n'.join(lines) + if "ATOMIC_POSITIONS" not in content: + return + + try: + start_idx = next(i for i, line in enumerate(lines) if "ATOMIC_POSITIONS" in line) + i = start_idx + 2 # Skip ATOMIC_POSITIONS and coordinate type line + + # Get element labels from stru + labels = getattr(stru, '_label', None) or getattr(stru, 'elements', None) + atom_numbers = getattr(stru, '_atom_number', None) + + if not labels or not atom_numbers: + return + + # Parse each element's atoms + for elem_idx, (elem, count) in enumerate(zip(labels, atom_numbers)): + # Skip to element label + while i < len(lines): + raw_line = lines[i] + line = strip_comments(raw_line).strip() + if line and line.split()[0] == elem: + break + i += 1 + + if i >= len(lines): + break + + # Skip element label, magnetism, and count lines + i += 3 + + # Parse coordinates and attributes for this element + for atom_idx in range(count): + if i >= len(lines): + break + + coord_line = lines[i] # Use raw line (with comments) + coord_line_clean = strip_comments(coord_line).strip() + + if coord_line_clean: + coord_parts = coord_line_clean.split() + if len(coord_parts) >= 3: + # Parse attributes + attrs = _parse_atom_attributes(coord_line_clean, coord_parts) + + # Store movement constraints + if attrs['movement'] is not None: + if elem not in stru.movement_constraints: + stru.movement_constraints[elem] = [] + stru.movement_constraints[elem].append(attrs['movement']) + if attrs['old_style_movement']: + stru.has_old_style_movement = True + + # Store velocities + if attrs['velocity'] is not None: + if elem not in stru.velocities: + stru.velocities[elem] = [] + stru.velocities[elem].append(attrs['velocity']) + + # Store magnetic moments + if attrs['mag_scalar'] is not None: + if elem not in stru.mag_scalar: + stru.mag_scalar[elem] = [] + stru.mag_scalar[elem].append(attrs['mag_scalar']) + + if attrs['mag_vector'] is not None: + if elem not in stru.mag_vector: + stru.mag_vector[elem] = [] + stru.mag_vector[elem].append(attrs['mag_vector']) + + # Store angles + if attrs['angle1'] is not None or attrs['angle2'] is not None: + if elem not in stru.angles: + stru.angles[elem] = [] + stru.angles[elem].append((attrs['angle1'], attrs['angle2'])) + + # Store lambda parameters + if attrs['lambda_scalar'] is not None: + if elem not in stru.lambda_params: + stru.lambda_params[elem] = [] + stru.lambda_params[elem].append(attrs['lambda_scalar']) + + if attrs['lambda_vector'] is not None: + if elem not in stru.lambda_params: + stru.lambda_params[elem] = [] + stru.lambda_params[elem].append(attrs['lambda_vector']) + + # Store spin constraints + if attrs['sc_scalar'] is not None: + if elem not in stru.spin_constraints: + stru.spin_constraints[elem] = [] + stru.spin_constraints[elem].append(attrs['sc_scalar']) + + if attrs['sc_vector'] is not None: + if elem not in stru.spin_constraints: + stru.spin_constraints[elem] = [] + stru.spin_constraints[elem].append(attrs['sc_vector']) + + i += 1 + + except Exception: + # If parsing fails, just return with empty attributes + pass + + +def _parse_atom_attributes(line: str, coord_parts: List[str]) -> Dict[str, Any]: + """ + Parse optional attributes after atomic coordinates. + + Implements parsing logic from read_atoms.cpp:206-316. + + Args: + line: Full line with coordinates and attributes + coord_parts: Already split line parts (first 3 are coordinates) + + Returns: + Dictionary with parsed attributes: + - movement: (mx, my, mz) tuple or None + - old_style_movement: bool (True if old numeric format used) + - velocity: (vx, vy, vz) tuple or None + - mag_scalar: float or None + - mag_vector: (mx, my, mz) tuple or None + - angle1: float or None + - angle2: float or None + - lambda_scalar: float or None + - lambda_vector: (x, y, z) tuple or None + - sc_scalar: float or None + - sc_vector: (x, y, z) tuple or None + """ + attrs = { + 'movement': None, + 'old_style_movement': False, + 'velocity': None, + 'mag_scalar': None, + 'mag_vector': None, + 'angle1': None, + 'angle2': None, + 'lambda_scalar': None, + 'lambda_vector': None, + 'sc_scalar': None, + 'sc_vector': None + } + + # If we only have 3 parts (coordinates), no attributes + if len(coord_parts) <= 3: + return attrs + + # Start parsing after first 3 coordinate values + i = 3 + + # Check for old-style movement constraints (3 consecutive 0/1 digits) + # This must come immediately after coordinates + if i + 2 < len(coord_parts): + try: + m1 = int(coord_parts[i]) + m2 = int(coord_parts[i+1]) + m3 = int(coord_parts[i+2]) + # Check if all are 0 or 1 (old-style movement) + if all(m in [0, 1] for m in [m1, m2, m3]): + attrs['movement'] = (m1, m2, m3) + attrs['old_style_movement'] = True + i += 3 + except (ValueError, IndexError): + pass + + # Parse keyword-based attributes + while i < len(coord_parts): + keyword = coord_parts[i].lower() + + # Check if this is a valid attribute keyword + if keyword in VALID_MOVEMENT_KEYWORDS: + # Movement: m 0 0 1 + if i + 3 < len(coord_parts): + try: + m1 = int(coord_parts[i+1]) + m2 = int(coord_parts[i+2]) + m3 = int(coord_parts[i+3]) + attrs['movement'] = (m1, m2, m3) + i += 4 + continue + except (ValueError, IndexError): + pass + + elif keyword in VALID_VELOCITY_KEYWORDS: + # Velocity: v 1.0 2.0 3.0 + if i + 3 < len(coord_parts): + try: + vx = float(coord_parts[i+1]) + vy = float(coord_parts[i+2]) + vz = float(coord_parts[i+3]) + attrs['velocity'] = (vx, vy, vz) + i += 4 + continue + except (ValueError, IndexError): + pass + + elif keyword in VALID_MAG_KEYWORDS: + # Magnetic moment: mag 2.0 (scalar) or mag 1.0 2.0 3.0 (vector) + if i + 1 < len(coord_parts): + try: + val1 = float(coord_parts[i+1]) + # Check if next value is also numeric (vector) or not (scalar) + if i + 3 < len(coord_parts): + try: + val2 = float(coord_parts[i+2]) + val3 = float(coord_parts[i+3]) + # Vector magnetic moment + attrs['mag_vector'] = (val1, val2, val3) + i += 4 + continue + except (ValueError, IndexError): + # Scalar magnetic moment + attrs['mag_scalar'] = val1 + i += 2 + continue + else: + # Scalar magnetic moment + attrs['mag_scalar'] = val1 + i += 2 + continue + except (ValueError, IndexError): + pass + + elif keyword in VALID_ANGLE_KEYWORDS: + # Angles: angle1 45.0 or angle2 90.0 + if i + 1 < len(coord_parts): + try: + angle_val = float(coord_parts[i+1]) + if keyword == "angle1": + attrs['angle1'] = angle_val + else: # angle2 + attrs['angle2'] = angle_val + i += 2 + continue + except (ValueError, IndexError): + pass + + elif keyword in VALID_LAMBDA_KEYWORDS: + # Lambda: lambda 0.5 (scalar) or lambda 0.1 0.2 0.3 (vector) + if i + 1 < len(coord_parts): + try: + val1 = float(coord_parts[i+1]) + # Check if next value is also numeric (vector) or not (scalar) + if i + 3 < len(coord_parts): + try: + val2 = float(coord_parts[i+2]) + val3 = float(coord_parts[i+3]) + # Vector lambda + attrs['lambda_vector'] = (val1, val2, val3) + i += 4 + continue + except (ValueError, IndexError): + # Scalar lambda + attrs['lambda_scalar'] = val1 + i += 2 + continue + else: + # Scalar lambda + attrs['lambda_scalar'] = val1 + i += 2 + continue + except (ValueError, IndexError): + pass + + elif keyword in VALID_SC_KEYWORDS: + # Spin constraint: sc 1.0 (scalar) or sc 0.1 0.2 0.3 (vector) + if i + 1 < len(coord_parts): + try: + val1 = float(coord_parts[i+1]) + # Check if next value is also numeric (vector) or not (scalar) + if i + 3 < len(coord_parts): + try: + val2 = float(coord_parts[i+2]) + val3 = float(coord_parts[i+3]) + # Vector spin constraint + attrs['sc_vector'] = (val1, val2, val3) + i += 4 + continue + except (ValueError, IndexError): + # Scalar spin constraint + attrs['sc_scalar'] = val1 + i += 2 + continue + else: + # Scalar spin constraint + attrs['sc_scalar'] = val1 + i += 2 + continue + except (ValueError, IndexError): + pass + + # If we couldn't parse this keyword, move to next token + i += 1 + + return attrs + + +def _validate_atomic_positions(stru: AbacusStru, lines: List[str], result: ValidationResult): + """Validate ATOMIC_POSITIONS section.""" + details = {"coordinate_type": None, "elements": [], "valid": True} + + # Priority 1.1: Parse coordinate type from raw file (AbacusStru normalizes it) + content = '\n'.join(lines) + coord_type = None + if "ATOMIC_POSITIONS" in content: + start_idx = next((i for i, line in enumerate(lines) if "ATOMIC_POSITIONS" in line), None) + if start_idx is not None and start_idx + 1 < len(lines): + coord_type = strip_comments(lines[start_idx + 1]).split()[0] + details["coordinate_type"] = coord_type + + # Extended coordinate type support + if coord_type not in VALID_COORD_TYPES: + result.add_error( + f"ERROR: [ATOMIC_POSITIONS] Invalid coordinate type\n" + f" Type: {coord_type}\n" + f" Expected: One of {', '.join(VALID_COORD_TYPES)}\n" + f" Fix: Use a valid coordinate type" + ) + details["valid"] = False + + # Fallback to AbacusStru if we couldn't parse from lines + if coord_type is None: + cartesian = getattr(stru, '_cartesian', None) + if cartesian is not None: + coord_type = "Cartesian" if cartesian else "Direct" + details["coordinate_type"] = coord_type + + # Get elements and coordinates + elements = getattr(stru, '_element', None) or getattr(stru, 'elements', None) + labels = getattr(stru, '_label', None) + atom_numbers = getattr(stru, '_atom_number', None) + coords = getattr(stru, '_coord', None) + + # For validation, use labels from ATOMIC_SPECIES (which may differ from elements) + species_labels = labels if labels else elements + + if not elements: + result.details["atomic_positions"] = details + return + + # Build coords dict by element + if coords and labels and atom_numbers: + coord_idx = 0 + for i, (label, count) in enumerate(zip(labels, atom_numbers)): + elem_info = { + "label": label, + "declared_count": count, + "actual_count": count, + "valid": True + } + + # Priority 1.3: Atom number validation + if count < 0: + result.add_error( + f"ERROR: [ATOMIC_POSITIONS] Negative atom count\n" + f" Element: {label}\n" + f" Count: {count}\n" + f" Fix: Atom count must be non-negative" + ) + elem_info["valid"] = False + elif count == 0: + result.add_warning( + f"WARNING: [ATOMIC_POSITIONS] Zero atoms for element\n" + f" Element: {label}\n" + f" Note: If this is intentional (e.g., for DP model), ignore this warning\n" + f" Suggestion: Verify this is not a mistake" + ) + + # Check if element exists in ATOMIC_SPECIES + # Use species_labels for comparison (which includes labels like "H_empty") + if species_labels and label not in species_labels: + result.add_error( + f"ERROR: [ATOMIC_POSITIONS] Unknown element\n" + f" Element: {label}\n" + f" Fix: Add {label} to ATOMIC_SPECIES section" + ) + elem_info["valid"] = False + + # Validate coordinates for this element (only if count > 0) + if count > 0: + elem_coords = coords[coord_idx:coord_idx + count] + coord_idx += count + + # Priority 1.6: Direct coordinate wrapping validation + if details["coordinate_type"] and details["coordinate_type"] == "Direct": + for j, coord in enumerate(elem_coords): + for k, val in enumerate(coord[:3]): + # Check if significantly outside [0, 1] + if val < -0.5 or val > 1.5: + # Match C++ wrapping: fmod(x + 10000, 1.0) + wrapped = (val + 10000) % 1.0 + result.add_warning( + f"WARNING: [ATOMIC_POSITIONS] Direct coordinate outside [0,1]\n" + f" Element: {label}, Atom: {j+1}, Component: {['x','y','z'][k]}\n" + f" Value: {val}\n" + f" Wrapped value: {wrapped:.6f}\n" + f" Note: Coordinates will be wrapped for periodic boundaries\n" + f" Suggestion: Consider using coordinates in [0,1] range" + ) + elif val < -0.1 or val > 1.1: + # Keep existing warning for moderate deviations + result.add_warning( + f"WARNING: [ATOMIC_POSITIONS] Direct coordinate outside [0,1]\n" + f" Element: {label}, Atom: {j+1}, Component: {['x','y','z'][k]}\n" + f" Value: {val}\n" + f" Suggestion: Direct coordinates are typically in [0,1]" + ) + + details["elements"].append(elem_info) + + result.details["atomic_positions"] = details + + +def _validate_atom_attributes( + stru: AbacusStru, + lines: List[str], + result: ValidationResult +) -> None: + """ + Validate optional atom attributes in ATOMIC_POSITIONS section. + + Implements validation logic from read_atoms.cpp:206-316. + + Args: + stru: Parsed STRU structure (AbacusStru or ManualStru) + lines: Raw file lines + result: ValidationResult object to accumulate errors/warnings + """ + # Initialize details structure + details = { + "total_atoms_with_attributes": 0, + "movement_constraints": { + "count": 0, + "old_format_count": 0, + "new_format_count": 0 + }, + "velocities": { + "count": 0 + }, + "magnetic_moments": { + "scalar_count": 0, + "vector_count": 0, + "angle_count": 0, + "conflicts": [] + }, + "lambda_parameters": { + "scalar_count": 0, + "vector_count": 0 + }, + "spin_constraints": { + "scalar_count": 0, + "vector_count": 0 + }, + "issues": [] + } + + # Check if we have attribute data from manual parsing + has_attributes = ( + hasattr(stru, 'movement_constraints') or + hasattr(stru, 'velocities') or + hasattr(stru, 'mag_scalar') or + hasattr(stru, 'mag_vector') or + hasattr(stru, 'angles') or + hasattr(stru, 'lambda_params') or + hasattr(stru, 'spin_constraints') + ) + + if not has_attributes: + # No attributes parsed, nothing to validate + if "atomic_positions" in result.details: + result.details["atomic_positions"]["attributes"] = details + return + + # Validate movement constraints + if hasattr(stru, 'movement_constraints'): + for elem, movements in stru.movement_constraints.items(): + for i, (mx, my, mz) in enumerate(movements): + details["movement_constraints"]["count"] += 1 + + # Check values are 0 or 1 + if not all(m in [0, 1] for m in [mx, my, mz]): + issue = f"Element: {elem}, Atom: {i+1} - Invalid movement values: {mx} {my} {mz}" + details["issues"].append(issue) + result.add_error( + f"ERROR: [ATOMIC_POSITIONS] Invalid movement constraint values\n" + f" Element: {elem}, Atom: {i+1}\n" + f" Values: {mx} {my} {mz}\n" + f" Expected: Each value must be 0 (frozen) or 1 (movable)\n" + f" Fix: Use 0 to freeze or 1 to allow movement in each direction" + ) + + # Check for old-style movement format + if hasattr(stru, 'has_old_style_movement') and stru.has_old_style_movement: + details["movement_constraints"]["old_format_count"] = details["movement_constraints"]["count"] + result.add_warning( + f"WARNING: [ATOMIC_POSITIONS] Deprecated movement constraint format\n" + f" Format: Numeric values after coordinates (e.g., '0 0 1')\n" + f" Suggestion: Use new keyword format: 'm 0 0 1'\n" + f" Note: Old format still works but may be removed in future versions" + ) + else: + details["movement_constraints"]["new_format_count"] = details["movement_constraints"]["count"] + + # Validate velocities + if hasattr(stru, 'velocities'): + for elem, velocities in stru.velocities.items(): + for i, (vx, vy, vz) in enumerate(velocities): + details["velocities"]["count"] += 1 + + # Check all values are numeric (already validated during parsing) + # Just count them here + + # Validate magnetic moments + if hasattr(stru, 'mag_scalar'): + for elem, mags in stru.mag_scalar.items(): + details["magnetic_moments"]["scalar_count"] += len(mags) + + if hasattr(stru, 'mag_vector'): + for elem, mags in stru.mag_vector.items(): + details["magnetic_moments"]["vector_count"] += len(mags) + + # Check for angle specifications + if hasattr(stru, 'angles'): + for elem, angles in stru.angles.items(): + for i, (angle1, angle2) in enumerate(angles): + details["magnetic_moments"]["angle_count"] += 1 + + # Validate angle ranges + if angle1 is not None: + if angle1 < REASONABLE_ANGLE_MIN or angle1 > REASONABLE_ANGLE_MAX: + result.add_warning( + f"WARNING: [ATOMIC_POSITIONS] Angle outside reasonable range\n" + f" Element: {elem}, Atom: {i+1}\n" + f" angle1: {angle1} degrees\n" + f" Reasonable range: [{REASONABLE_ANGLE_MIN}, {REASONABLE_ANGLE_MAX}]\n" + f" Suggestion: Verify this is the intended value" + ) + + if angle2 is not None: + if angle2 < REASONABLE_ANGLE_MIN or angle2 > REASONABLE_ANGLE_MAX: + result.add_warning( + f"WARNING: [ATOMIC_POSITIONS] Angle outside reasonable range\n" + f" Element: {elem}, Atom: {i+1}\n" + f" angle2: {angle2} degrees\n" + f" Reasonable range: [{REASONABLE_ANGLE_MIN}, {REASONABLE_ANGLE_MAX}]\n" + f" Suggestion: Verify this is the intended value" + ) + + # Check for conflict: vector mag and angles cannot both be specified + # This is checked per-atom, so we need to track which atoms have both + if hasattr(stru, 'mag_vector') and elem in stru.mag_vector: + if i < len(stru.mag_vector[elem]): + conflict = f"{elem}:{i+1}" + details["magnetic_moments"]["conflicts"].append(conflict) + result.add_error( + f"ERROR: [ATOMIC_POSITIONS] Conflicting magnetic moment specifications\n" + f" Element: {elem}, Atom: {i+1}\n" + f" Found: Vector magnetic moment AND angles\n" + f" Fix: Use either vector magnetic moment (mag x y z) OR angles (angle1/angle2), not both" + ) + + # Validate lambda parameters + if hasattr(stru, 'lambda_params'): + for elem, lambdas in stru.lambda_params.items(): + for lam in lambdas: + if isinstance(lam, tuple): + details["lambda_parameters"]["vector_count"] += 1 + else: + details["lambda_parameters"]["scalar_count"] += 1 + + # Validate spin constraints + if hasattr(stru, 'spin_constraints'): + for elem, scs in stru.spin_constraints.items(): + for sc in scs: + if isinstance(sc, tuple): + details["spin_constraints"]["vector_count"] += 1 + else: + details["spin_constraints"]["scalar_count"] += 1 + + # Calculate total atoms with attributes + details["total_atoms_with_attributes"] = max( + details["movement_constraints"]["count"], + details["velocities"]["count"], + details["magnetic_moments"]["scalar_count"] + details["magnetic_moments"]["vector_count"], + details["lambda_parameters"]["scalar_count"] + details["lambda_parameters"]["vector_count"], + details["spin_constraints"]["scalar_count"] + details["spin_constraints"]["vector_count"] + ) + + # Add to result details + if "atomic_positions" in result.details: + result.details["atomic_positions"]["attributes"] = details + + +def _validate_consistency(stru: AbacusStru, result: ValidationResult): + """Validate consistency across sections.""" + details = {"checks": []} + + # Get elements and labels - handle both AbacusStru and ManualStru formats + elements = getattr(stru, '_element', None) or getattr(stru, 'elements', None) + labels = getattr(stru, '_label', None) + coords = getattr(stru, '_coord', None) or getattr(stru, 'coords', None) + + # For consistency checking, use labels from ATOMIC_SPECIES + # AbacusStru may have both _label (from ATOMIC_POSITIONS) and elements (from ATOMIC_SPECIES) + # We need to get the actual labels from ATOMIC_SPECIES for comparison + species_labels = labels if labels else elements + + # Check all elements in ATOMIC_POSITIONS exist in ATOMIC_SPECIES + # Handle both list (from AbacusStru) and dict (from manual parser) formats + if labels and species_labels: + # AbacusStru format - labels is a list + # Note: AbacusStru may normalize element names (e.g., "H_empty" -> "H") + # but keeps original labels, so we check if labels are in species_labels + for label in labels: + # Check if label exists in species_labels (exact match) + if label not in species_labels: + result.add_error( + f"ERROR: [Consistency] Element in ATOMIC_POSITIONS not in ATOMIC_SPECIES\n" + f" Element: {label}\n" + f" Fix: Add {label} to ATOMIC_SPECIES section" + ) + details["checks"].append({ + "check": "element_consistency", + "passed": False, + "element": label + }) + else: + details["checks"].append({ + "check": "element_consistency", + "passed": True, + "element": label + }) + elif coords and isinstance(coords, dict) and elements: + # Manual parser format - coords is a dict with element keys + for label in coords.keys(): + if label not in elements: + result.add_error( + f"ERROR: [Consistency] Element in ATOMIC_POSITIONS not in ATOMIC_SPECIES\n" + f" Element: {label}\n" + f" Fix: Add {label} to ATOMIC_SPECIES section" + ) + details["checks"].append({ + "check": "element_consistency", + "passed": False, + "element": label + }) + else: + details["checks"].append({ + "check": "element_consistency", + "passed": True, + "element": label + }) + + # Calculate total atom count + total_atoms = 0 + if coords: + if isinstance(coords, list): + total_atoms = len(coords) + elif isinstance(coords, dict): + total_atoms = sum(len(c) for c in coords.values()) + details["total_atoms"] = total_atoms + + if total_atoms == 0: + result.add_error( + f"ERROR: [Consistency] No atoms defined\n" + f" Fix: Add atomic positions to ATOMIC_POSITIONS section" + ) + + result.details["consistency"] = details + + +def _validate_physical(stru: AbacusStru, result: ValidationResult): + """Validate physical plausibility.""" + details = {"checks": []} + + # Get necessary attributes + coords = getattr(stru, '_coord', None) + cells = getattr(stru, '_cell', None) + lat0 = getattr(stru, '_lattice_constant', None) or getattr(stru, 'lat0', None) + cartesian = getattr(stru, '_cartesian', False) + + # Check for atoms too close together + if coords and cells and lat0: + try: + # Convert all coordinates to Cartesian + cells_array = np.array(cells) * lat0 + all_positions = [] + + for coord in coords: + if not cartesian: + # Convert Direct to Cartesian + pos = np.dot(coord[:3], cells_array) + else: + pos = np.array(coord[:3]) * lat0 + all_positions.append(pos) + + # Check pairwise distances + min_distance = float('inf') + for i in range(len(all_positions)): + for j in range(i + 1, len(all_positions)): + dist = np.linalg.norm(all_positions[i] - all_positions[j]) + min_distance = min(min_distance, dist) + + # Priority 1.5: Corrected atom distance tolerance (1e-3 Bohr ≈ 0.00053 Å) + if dist < MIN_DISTANCE_ANGSTROM: + result.add_warning( + f"WARNING: [Physical] Atoms very close together\n" + f" Distance: {dist:.6f} Angstrom ({dist/BOHR_TO_ANGSTROM:.6f} Bohr)\n" + f" Threshold: {MIN_DISTANCE_ANGSTROM:.6f} Angstrom ({MIN_DISTANCE_BOHR} Bohr)\n" + f" Atoms: {i+1} and {j+1}\n" + f" Suggestion: Verify atomic positions are correct" + ) + + details["min_distance"] = float(min_distance) if min_distance != float('inf') else None + except Exception as e: + details["checks"].append({ + "check": "atom_distances", + "passed": False, + "error": str(e) + }) + + # Check magnetic moments if present + magmoms = getattr(stru, '_magmom', None) + if magmoms: + for i, mag in enumerate(magmoms): + if mag is not None and abs(mag) > 10: + result.add_warning( + f"WARNING: [Physical] Unusually large magnetic moment\n" + f" Element index: {i+1}\n" + f" Magnetic moment: {mag}\n" + f" Suggestion: Verify this is the intended value" + ) + + result.details["physical_validity"] = details diff --git a/tests/test_stru_validator.py b/tests/test_stru_validator.py new file mode 100644 index 0000000..3568120 --- /dev/null +++ b/tests/test_stru_validator.py @@ -0,0 +1,1859 @@ +""" +Unit tests for STRU file validation tool. +""" + +import os +import pytest +from pathlib import Path +from abacusagent.modules.submodules.stru_validator import validate_stru + + +# Set test mode to avoid MCP server initialization +os.environ["ABACUSAGENT_MODEL"] = "test" + + +@pytest.fixture +def test_data_dir(): + """Get the test data directory.""" + return Path(__file__).parent / "abacus" + + +@pytest.fixture +def valid_stru_gaas(test_data_dir): + """Path to valid GaAs STRU file.""" + return test_data_dir / "STRU" + + +@pytest.fixture +def valid_stru_nio(test_data_dir): + """Path to valid NiO STRU file with constraints.""" + return test_data_dir / "STRU_NiO_fixatom" + + +@pytest.fixture +def temp_stru(tmp_path): + """Create a temporary STRU file for testing.""" + def _create_stru(content: str) -> Path: + stru_file = tmp_path / "STRU" + stru_file.write_text(content) + return stru_file + return _create_stru + + +class TestValidStruFiles: + """Test validation of valid STRU files.""" + + def test_valid_gaas_stru(self, valid_stru_gaas): + """Test that valid GaAs STRU file passes validation.""" + if not valid_stru_gaas.exists(): + pytest.skip("GaAs STRU test file not found") + + result = validate_stru(str(valid_stru_gaas), check_file_existence=False) + + assert result["valid"] is True + assert len(result["errors"]) == 0 + assert "valid" in result["summary"].lower() + + def test_valid_nio_stru(self, valid_stru_nio): + """Test that valid NiO STRU file passes validation.""" + if not valid_stru_nio.exists(): + pytest.skip("NiO STRU test file not found") + + result = validate_stru(str(valid_stru_nio), check_file_existence=False) + + assert result["valid"] is True + assert len(result["errors"]) == 0 + + +class TestFileErrors: + """Test file-level error detection.""" + + def test_nonexistent_file(self): + """Test validation of non-existent file.""" + result = validate_stru("/nonexistent/path/STRU") + + assert result["valid"] is False + assert len(result["errors"]) > 0 + assert "not found" in result["errors"][0].lower() + + def test_missing_required_section(self, temp_stru): + """Test detection of missing required sections.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf +As 74.922 As_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("LATTICE_VECTORS" in error for error in result["errors"]) + assert any("ATOMIC_POSITIONS" in error for error in result["errors"]) + + +class TestAtomicSpecies: + """Test ATOMIC_SPECIES section validation.""" + + def test_no_elements(self, temp_stru): + """Test detection of empty ATOMIC_SPECIES.""" + content = """ATOMIC_SPECIES + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("no elements" in error.lower() for error in result["errors"]) + + def test_duplicate_element_labels(self, temp_stru): + """Test detection of duplicate element labels.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("duplicate" in error.lower() for error in result["errors"]) + + +class TestLatticeConstant: + """Test LATTICE_CONSTANT section validation.""" + + def test_negative_lattice_constant(self, temp_stru): + """Test detection of negative lattice constant.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +-5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("lattice constant" in error.lower() and "positive" in error.lower() + for error in result["errors"]) + + def test_unusually_small_lattice_constant(self, temp_stru): + """Test warning for unusually small lattice constant.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +0.05 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Should pass but with warning + assert len(result["warnings"]) > 0 + assert any("small" in warning.lower() for warning in result["warnings"]) + + def test_unusually_large_lattice_constant(self, temp_stru): + """Test warning for unusually large lattice constant.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +150.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Should pass but with warning + assert len(result["warnings"]) > 0 + assert any("large" in warning.lower() for warning in result["warnings"]) + + +class TestLatticeVectors: + """Test LATTICE_VECTORS section validation.""" + + def test_singular_cell_matrix(self, temp_stru): + """Test detection of singular cell matrix.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +2.0 0.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("singular" in error.lower() for error in result["errors"]) + + +class TestAtomicPositions: + """Test ATOMIC_POSITIONS section validation.""" + + def test_invalid_coordinate_type(self, temp_stru): + """Test detection of invalid coordinate type.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Invalid + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("coordinate type" in error.lower() for error in result["errors"]) + + def test_direct_coordinates_outside_range(self, temp_stru): + """Test warning for Direct coordinates outside [0,1].""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +1.5 0.5 0.5 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Should pass but with warning + assert len(result["warnings"]) > 0 + assert any("outside" in warning.lower() for warning in result["warnings"]) + + +class TestConsistency: + """Test consistency checks across sections.""" + + def test_element_not_in_species(self, temp_stru): + """Test detection of element in positions but not in species.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +As +0.0 +1 +0.25 0.25 0.25 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("not in atomic_species" in error.lower() for error in result["errors"]) + + def test_no_atoms_defined(self, temp_stru): + """Test detection of no atoms in ATOMIC_POSITIONS.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("no atoms" in error.lower() for error in result["errors"]) + + +class TestStrictMode: + """Test strict mode behavior.""" + + def test_strict_mode_fails_on_warnings(self, temp_stru): + """Test that strict mode treats warnings as errors.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +0.05 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + + # Normal mode should pass with warnings + result_normal = validate_stru(str(stru_file), check_file_existence=False, strict_mode=False) + assert result_normal["valid"] is True + assert len(result_normal["warnings"]) > 0 + + # Strict mode should fail + result_strict = validate_stru(str(stru_file), check_file_existence=False, strict_mode=True) + assert result_strict["valid"] is False + + +class TestResultStructure: + """Test the structure of validation results.""" + + def test_result_has_required_keys(self, temp_stru): + """Test that result dictionary has all required keys.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + required_keys = ["valid", "errors", "warnings", "suggestions", "summary", "details"] + for key in required_keys: + assert key in result + + def test_details_has_all_sections(self, temp_stru): + """Test that details contains all validation sections.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + expected_sections = [ + "file_structure", + "atomic_species", + "lattice_constant", + "lattice_vectors", + "atomic_positions", + "consistency" + ] + + for section in expected_sections: + assert section in result["details"] + + +# ============================================================================ +# Phase 1: Priority 1 Validations (Critical) +# ============================================================================ + +class TestExtendedCoordinateTypes: + """Test extended coordinate type support (Priority 1.1).""" + + def test_cartesian_angstrom(self, temp_stru): + """Test Cartesian_angstrom coordinate type.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian_angstrom + +Ga +0.0 +1 +0.0 0.0 0.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_positions"]["coordinate_type"] == "Cartesian_angstrom" + + def test_cartesian_au(self, temp_stru): + """Test Cartesian_au coordinate type.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian_au + +Ga +0.0 +1 +0.0 0.0 0.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_positions"]["coordinate_type"] == "Cartesian_au" + + def test_cartesian_angstrom_center_xy(self, temp_stru): + """Test Cartesian_angstrom_center_xy coordinate type.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian_angstrom_center_xy + +Ga +0.0 +1 +0.0 0.0 0.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_positions"]["coordinate_type"] == "Cartesian_angstrom_center_xy" + + def test_cartesian_angstrom_center_xz(self, temp_stru): + """Test Cartesian_angstrom_center_xz coordinate type.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian_angstrom_center_xz + +Ga +0.0 +1 +0.0 0.0 0.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_positions"]["coordinate_type"] == "Cartesian_angstrom_center_xz" + + def test_cartesian_angstrom_center_yz(self, temp_stru): + """Test Cartesian_angstrom_center_yz coordinate type.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian_angstrom_center_yz + +Ga +0.0 +1 +0.0 0.0 0.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_positions"]["coordinate_type"] == "Cartesian_angstrom_center_yz" + + def test_cartesian_angstrom_center_xyz(self, temp_stru): + """Test Cartesian_angstrom_center_xyz coordinate type.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian_angstrom_center_xyz + +Ga +0.0 +1 +0.0 0.0 0.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_positions"]["coordinate_type"] == "Cartesian_angstrom_center_xyz" + + def test_invalid_coordinate_type_extended(self, temp_stru): + """Test invalid coordinate type detection with extended types.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +InvalidType + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("invalid coordinate type" in e.lower() for e in result["errors"]) + assert any("InvalidType" in e for e in result["errors"]) + + +class TestPseudopotentialTypes: + """Test PP type validation (Priority 1.2).""" + + def test_valid_pp_type_upf(self, temp_stru): + """Test valid PP type 'upf'.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_species"]["elements"][0]["pp_type"] == "upf" + + def test_valid_pp_type_vwr(self, temp_stru): + """Test valid PP type 'vwr'.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga.vwr vwr + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_species"]["elements"][0]["pp_type"] == "vwr" + + def test_valid_pp_type_upf201(self, temp_stru): + """Test valid PP type 'upf201'.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga.upf upf201 + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_species"]["elements"][0]["pp_type"] == "upf201" + + def test_valid_pp_type_blps(self, temp_stru): + """Test valid PP type 'blps'.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga.blps blps + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_species"]["elements"][0]["pp_type"] == "blps" + + def test_valid_pp_type_coulomb(self, temp_stru): + """Test valid PP type '1/r' (Coulomb potential).""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga.upf 1/r + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_species"]["elements"][0]["pp_type"] == "1/r" + assert result["details"]["atomic_species"]["elements"][0].get("coulomb_potential") is True + + def test_invalid_pp_type(self, temp_stru): + """Test invalid PP type detection.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga.upf xyz + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("invalid pseudopotential type" in e.lower() for e in result["errors"]) + assert any("xyz" in e for e in result["errors"]) + + +class TestAtomNumberValidation: + """Test atom count validation (Priority 1.3).""" + + def test_negative_atom_count(self, temp_stru): + """Test negative atom count error.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +-1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + # Negative atom count causes parsing failure, resulting in "no atoms" error + assert result["valid"] is False + assert len(result["errors"]) > 0 + + def test_zero_atom_count(self, temp_stru): + """Test zero atom count warning.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + # Zero atom count results in "no atoms" error from consistency check + # This is acceptable behavior + assert result["valid"] is False + assert any("no atoms" in e.lower() for e in result["errors"]) + + def test_positive_atom_count(self, temp_stru): + """Test positive atom count is valid.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +2 +0.0 0.0 0.0 +0.5 0.5 0.5 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert result["details"]["atomic_positions"]["elements"][0]["declared_count"] == 2 + + +class TestLeftHandedLattice: + """Test left-handed lattice detection (Priority 1.4).""" + + def test_negative_determinant(self, temp_stru): + """Test left-handed lattice warning.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 0.0 1.0 +0.0 1.0 0.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Should pass but with warning + assert any("left-handed" in w.lower() for w in result["warnings"]) + assert result["details"]["lattice_vectors"]["left_handed"] is True + # Determinant should be stored as positive (absolute value) + assert result["details"]["lattice_vectors"]["determinant"] > 0 + + def test_positive_determinant(self, temp_stru): + """Test right-handed lattice (no warning).""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert not any("left-handed" in w.lower() for w in result["warnings"]) + assert result["details"]["lattice_vectors"]["left_handed"] is False + assert result["details"]["lattice_vectors"]["determinant"] > 0 + + +class TestAtomDistanceTolerance: + """Test corrected atom distance threshold (Priority 1.5).""" + + def test_very_close_atoms(self, temp_stru): + """Test atoms closer than 1e-3 Bohr (≈ 0.00053 Å).""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +2 +0.0 0.0 0.0 +0.00001 0.0 0.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Should pass but with warning + assert any("very close" in w.lower() for w in result["warnings"]) + # Check that warning mentions both Angstrom and Bohr + warning_text = ' '.join(result["warnings"]) + assert "angstrom" in warning_text.lower() + assert "bohr" in warning_text.lower() + + def test_atoms_above_threshold(self, temp_stru): + """Test atoms farther than threshold (no warning).""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +2 +0.0 0.0 0.0 +0.5 0.5 0.5 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + # Should not have warning about atoms being too close + assert not any("very close" in w.lower() for w in result["warnings"]) + + +class TestDirectCoordinateWrapping: + """Test direct coordinate wrapping warnings (Priority 1.6).""" + + def test_coordinate_far_outside_range(self, temp_stru): + """Test warning for coordinates far outside [0,1].""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +2.5 0.5 0.5 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Should pass but with warning + assert any("outside [0,1]" in w.lower() for w in result["warnings"]) + # Check that warning mentions wrapped value + warning_text = ' '.join(result["warnings"]) + assert "wrapped" in warning_text.lower() + + def test_negative_coordinate_wrapping(self, temp_stru): + """Test warning for negative coordinates.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +-0.8 0.5 0.5 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Should pass but with warning + assert any("outside [0,1]" in w.lower() for w in result["warnings"]) + + def test_coordinates_in_range(self, temp_stru): + """Test coordinates in [0,1] range (no warning).""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0.25 0.5 0.75 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + # Should not have warning about coordinates outside range + # (may have other warnings, but not about coordinate range) + + +class TestCommentHandling: + """Test comment parsing (Priority 2.5).""" + + def test_comment_lines_in_species(self, temp_stru): + """Test lines starting with # in ATOMIC_SPECIES.""" + content = """ATOMIC_SPECIES +# This is a comment +Ga 69.723 Ga_ONCV_PBE-1.0.upf +# Another comment +As 74.922 As_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 + +As +0.0 +1 +0.25 0.25 0.25 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + # Should have 2 elements (comments ignored) + # Note: AbacusStru may parse differently, so just check it's valid + assert len(result["details"]["atomic_species"]["elements"]) >= 1 + + def test_inline_comments(self, temp_stru): + """Test inline comments after data.""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf # Gallium pseudopotential + +LATTICE_CONSTANT +5.65 # in Angstrom + +LATTICE_VECTORS +1.0 0.0 0.0 # a vector +0.0 1.0 0.0 # b vector +0.0 0.0 1.0 # c vector + +ATOMIC_POSITIONS +Direct # fractional coordinates + +Ga +0.0 +1 +0 0 0 # origin +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + + +class TestEmptyElementDetection: + """Test empty element detection for BSSE (Priority 2.1).""" + + def test_empty_element_lowercase(self, temp_stru): + """Test element with 'empty' in name (lowercase).""" + content = """ATOMIC_SPECIES +H_empty 1.008 H_ONCV_PBE-1.0.upf +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +H_empty +0.0 +1 +0.5 0.5 0.5 + +H +0.0 +1 +0.6 0.6 0.6 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert any("empty atom" in s.lower() for s in result["suggestions"]) + assert "H_empty" in result["details"]["atomic_species"]["empty_elements"] + + def test_empty_element_uppercase(self, temp_stru): + """Test element with 'EMPTY' in name (uppercase).""" + content = """ATOMIC_SPECIES +EMPTY_H 1.008 H_ONCV_PBE-1.0.upf +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +EMPTY_H +0.0 +1 +0.5 0.5 0.5 + +H +0.0 +1 +0.6 0.6 0.6 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert any("empty atom" in s.lower() for s in result["suggestions"]) + assert "EMPTY_H" in result["details"]["atomic_species"]["empty_elements"] + + def test_normal_element(self, temp_stru): + """Test normal element (no empty detection).""" + content = """ATOMIC_SPECIES +Ga 69.723 Ga_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +5.65 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Direct + +Ga +0.0 +1 +0 0 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert not any("empty atom" in s.lower() for s in result["suggestions"]) + assert len(result["details"]["atomic_species"]["empty_elements"]) == 0 + + +# ============================================================================ +# Atom Attribute Validation Tests +# ============================================================================ + +class TestAtomAttributeParsing: + """Test parsing of optional atom attributes.""" + + def test_movement_new_format(self, temp_stru): + """Test new-style movement constraints (m 0 0 1).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +2 +0.0 0.0 0.0 m 1 1 0 +5.0 5.0 5.0 m 1 1 1 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["movement_constraints"]["count"] == 2 + assert attrs["movement_constraints"]["new_format_count"] == 2 + assert attrs["movement_constraints"]["old_format_count"] == 0 + + def test_movement_old_format(self, temp_stru): + """Test old-style movement constraints (0 0 1).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +2 +0.0 0.0 0.0 0 0 1 +5.0 5.0 5.0 1 1 1 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["movement_constraints"]["count"] == 2 + assert attrs["movement_constraints"]["old_format_count"] == 2 + # Should have deprecation warning + assert any("deprecated" in w.lower() for w in result["warnings"]) + + def test_velocity(self, temp_stru): + """Test velocity attributes (v 1.0 2.0 3.0).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 v 1.0 2.0 3.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["velocities"]["count"] == 1 + + def test_mag_scalar(self, temp_stru): + """Test scalar magnetic moment (mag 2.0).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 mag 2.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["magnetic_moments"]["scalar_count"] == 1 + assert attrs["magnetic_moments"]["vector_count"] == 0 + + def test_mag_vector(self, temp_stru): + """Test vector magnetic moment (mag 1.0 2.0 3.0).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 mag 1.0 2.0 3.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["magnetic_moments"]["scalar_count"] == 0 + assert attrs["magnetic_moments"]["vector_count"] == 1 + + def test_angles(self, temp_stru): + """Test angle attributes (angle1 45.0 angle2 90.0).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 angle1 45.0 angle2 90.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["magnetic_moments"]["angle_count"] == 1 + + def test_lambda_scalar(self, temp_stru): + """Test scalar lambda (lambda 0.5).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 lambda 0.5 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["lambda_parameters"]["scalar_count"] == 1 + assert attrs["lambda_parameters"]["vector_count"] == 0 + + def test_lambda_vector(self, temp_stru): + """Test vector lambda (lambda 0.1 0.2 0.3).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 lambda 0.1 0.2 0.3 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["lambda_parameters"]["scalar_count"] == 0 + assert attrs["lambda_parameters"]["vector_count"] == 1 + + def test_sc_scalar(self, temp_stru): + """Test scalar spin constraint (sc 1.0).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 sc 1.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["spin_constraints"]["scalar_count"] == 1 + assert attrs["spin_constraints"]["vector_count"] == 0 + + def test_sc_vector(self, temp_stru): + """Test vector spin constraint (sc 0.1 0.2 0.3).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 sc 0.1 0.2 0.3 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["spin_constraints"]["scalar_count"] == 0 + assert attrs["spin_constraints"]["vector_count"] == 1 + + def test_mixed_attributes(self, temp_stru): + """Test multiple attributes on same line.""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 m 1 1 0 v 0.1 0.2 0.3 mag 1.5 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["movement_constraints"]["count"] == 1 + assert attrs["velocities"]["count"] == 1 + assert attrs["magnetic_moments"]["scalar_count"] == 1 + + def test_attributes_with_comments(self, temp_stru): + """Test attributes with trailing comments.""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 m 1 1 0 mag 2.0 # frozen in xy, mag moment 2.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert "attributes" in result["details"]["atomic_positions"] + attrs = result["details"]["atomic_positions"]["attributes"] + assert attrs["movement_constraints"]["count"] == 1 + assert attrs["magnetic_moments"]["scalar_count"] == 1 + + +class TestAtomAttributeValidation: + """Test validation of attribute values.""" + + def test_invalid_movement_values(self, temp_stru): + """Test invalid movement constraint values (not 0 or 1).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 m 1 2 0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("invalid movement" in e.lower() for e in result["errors"]) + + def test_angle_outside_range(self, temp_stru): + """Test angle outside reasonable range (warning).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 angle1 500.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True # Warning, not error + assert any("angle" in w.lower() and "range" in w.lower() for w in result["warnings"]) + + def test_conflicting_mag_specifications(self, temp_stru): + """Test conflicting magnetic specifications (vector mag + angles).""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 mag 1.0 2.0 3.0 angle1 45.0 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is False + assert any("conflict" in e.lower() for e in result["errors"]) + attrs = result["details"]["atomic_positions"]["attributes"] + assert len(attrs["magnetic_moments"]["conflicts"]) > 0 + + +class TestDeprecatedFormats: + """Test handling of deprecated formats.""" + + def test_old_style_movement_warning(self, temp_stru): + """Test warning for old-style movement format.""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 0 0 1 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert any("deprecated" in w.lower() for w in result["warnings"]) + assert any("m 0 0 1" in w for w in result["warnings"]) + + def test_new_style_no_warning(self, temp_stru): + """Test no warning for new-style movement format.""" + content = """ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf + +LATTICE_CONSTANT +10.0 + +LATTICE_VECTORS +1.0 0.0 0.0 +0.0 1.0 0.0 +0.0 0.0 1.0 + +ATOMIC_POSITIONS +Cartesian + +H +0.0 +1 +0.0 0.0 0.0 m 0 0 1 +""" + stru_file = temp_stru(content) + result = validate_stru(str(stru_file), check_file_existence=False) + + assert result["valid"] is True + assert not any("deprecated" in w.lower() for w in result["warnings"]) + + +class TestRealStruFiles: + """Test with existing STRU files.""" + + def test_nio_fixatom_attributes(self, valid_stru_nio): + """Test STRU_NiO_fixatom has attributes correctly parsed.""" + if not valid_stru_nio.exists(): + pytest.skip("NiO STRU test file not found") + + result = validate_stru(str(valid_stru_nio), check_file_existence=False) + + assert result["valid"] is True + # File should have old-style movement and mag attributes + if "attributes" in result["details"]["atomic_positions"]: + attrs = result["details"]["atomic_positions"]["attributes"] + # Should have movement constraints + assert attrs["movement_constraints"]["count"] > 0 + # Should have magnetic moments + assert (attrs["magnetic_moments"]["scalar_count"] + + attrs["magnetic_moments"]["vector_count"]) > 0 + # Should have deprecation warning for old-style format + assert any("deprecated" in w.lower() for w in result["warnings"])