Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions gbasis/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,125 @@ def parse_gbs(gbs_basis_file):
return output


def parse_bse(basis_set, atoms=None):
"""Parse a basis set from the Basis Set Exchange (BSE).

This function lazily imports the ``basis_set_exchange`` package and converts the
BSE representation into the same dictionary format returned by the other
parsers in this module (mapping element symbol to list of (angmom, exps, coeffs)).

Parameters
----------
basis_set : str
Name of the basis set to fetch from BSE (e.g., "sto-3g", "6-31g").
atoms : list, optional
If provided, only elements in this list will be fetched. The list may contain
element atomic numbers (ints) or symbols (strs); it is passed directly to
``basis_set_exchange.get_basis(..., elements=atoms)``.

Returns
-------
basis_dict : dict
Dictionary mapping element symbol to list of tuples (angmom, exps, coeffs).

Raises
------
ImportError
If the ``basis_set_exchange`` package is not available.
ValueError
If an unexpected or missing layout is encountered in the BSE data.

"""
# lazy import so that BSE is an optional dependency
try:
from basis_set_exchange import lut, get_basis
except Exception as exc: # pragma: no cover - import depends on user env
raise ImportError(
"The 'basis_set_exchange' package is required for parse_bse."
" Install it with 'pip install basis-set-exchange'."
) from exc

bse_res = get_basis(basis_set, elements=atoms)
if not isinstance(bse_res, dict):
raise ValueError("Unexpected response from basis_set_exchange.get_basis; expected dict.")

elements = bse_res.get("elements", bse_res)
if not elements:
raise ValueError(f"No basis data found for '{basis_set}'.")

output = {}
for atom_num_str, info in elements.items():
atom_symbol = lut.element_sym_from_Z(int(atom_num_str), normalize=True)

shells = info.get("electron_shells")
if not shells:
raise ValueError(f"No electron shells for element {atom_symbol} in '{basis_set}'.")

for shell in shells:
exps_raw = shell.get("exponents")
if not exps_raw:
raise ValueError(f"Empty exponents for element {atom_symbol} in '{basis_set}'.")
exponents = np.asarray(exps_raw, dtype=float)

# BSE stores angular_momentum as list of ints
ang_moms = shell.get("angular_momentum")
if not ang_moms:
# missing angular momentum or empty list is unexpected; raise concise layout error
raise ValueError(f"Unexpected coefficients layout for element {atom_symbol}")

for i, l in enumerate(ang_moms):
coeffs_raw = shell.get("coefficients")
if not coeffs_raw or len(coeffs_raw) <= i:
raise ValueError(
f"Unexpected coefficients layout for element {atom_symbol}, l={l}."
)

coeffs_entry = coeffs_raw[i]
coeffs = np.asarray(coeffs_entry, dtype=float)

# Normalize to 2D array with shape (n_exponents, n_contractions).
# Accept scalars, 1D and 2D arrays and normalize them into
# (n_exponents, n_contractions) layout.
if coeffs.ndim == 0:
# scalar -> single exponent, single contraction
coeffs = coeffs.reshape(1, 1)
elif coeffs.ndim == 1:
# 1D array must match number of exponents and is treated as
# a single contraction (n_exponents,) -> (n_exponents, 1)
if coeffs.shape[0] == exponents.shape[0]:
coeffs = coeffs.reshape(-1, 1)
else:
raise ValueError(
f"Coefficient/exponent mismatch for {atom_symbol} (l={l}): "
f"{coeffs.shape[0]} coeffs vs {exponents.shape[0]} exponents"
)
elif coeffs.ndim == 2:
# Accept either (n_exponents, n_contractions) or the transposed
# (n_contractions, n_exponents).
if coeffs.shape[0] == exponents.shape[0]:
pass
elif coeffs.shape[1] == exponents.shape[0]:
coeffs = coeffs.T
else:
raise ValueError(
f"Coefficient/exponent mismatch for {atom_symbol} (l={l}): "
f"{coeffs.shape[0]}x{coeffs.shape[1]} vs {exponents.shape[0]} exponents"
)
else:
raise ValueError(
f"Unsupported coefficients ndim={coeffs.ndim} for {atom_symbol} (l={l})"
)

if coeffs.shape[0] != exponents.shape[0]:
raise ValueError(
f"Coefficient/exponent mismatch for {atom_symbol} (l={l})"
)

output.setdefault(atom_symbol, []).append((l, exponents, coeffs))

return output


def make_contractions(basis_dict, atoms, coords, coord_types):
"""Return the contractions that correspond to the given atoms for the given basis.

Expand Down
115 changes: 115 additions & 0 deletions notebooks/tutorial/bse_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5ca98ec4",
"metadata": {},
"source": [
"# Basis-Set-Exchange (BSE) support in gbasis\n",
"This notebook demonstrates the `parse_bse()` helper to fetch basis sets directly from the Basis Set Exchange (BSE) and convert them into the `gbasis` internal format."
]
},
{
"cell_type": "markdown",
"id": "2691181e",
"metadata": {},
"source": [
"# 1) Install optional dependency\n",
"\n",
"To use `parse_bse()` you can install the optional dependency:\n",
"\n",
"```\n",
"# install for runtime\n",
"pip install basis-set-exchange\n",
"\n",
"# or as a dev dependency\n",
"pip install -e .[dev]\n",
"```\n",
"\n",
"The implementation uses a lazy import and will raise an informative ImportError if the package is not available."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e693a42",
"metadata": {},
"outputs": [],
"source": [
"# 2) Import libraries and try BSE import\n",
"\n",
"import numpy as np\n",
"from gbasis.parsers import parse_bse\n",
"\n",
"# guarded import to show lazy behaviour in examples\n",
"try:\n",
" from basis_set_exchange import get_basis, lut\n",
" print(\"basis_set_exchange available\")\n",
"except Exception:\n",
" print(\"basis_set_exchange not installed; parse_bse will raise if invoked\")\n"
]
},
{
"cell_type": "markdown",
"id": "9b1a50d9",
"metadata": {},
"source": [
"# 3. Concept: BSE -> gbasis mapping\n",
"\n",
"# BSE JSON uses atomic numbers as keys; convert to symbols with lut.element_sym_from_Z\n",
"# Each element has 'electron_shells' with keys 'angular_momentum', 'exponents', 'coefficients'\n",
"# We'll assemble: element_symbol -> [(l, exponents(np.ndarray), coeffs(np.ndarray)), ...]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "429fd0c5",
"metadata": {},
"outputs": [],
"source": [
"# 4) fetch sto-3g for H and inspect\n",
"\n",
"try:\n",
" b = parse_bse(\"sto-3g\", atoms=[1])\n",
" print(\"Elements:\", list(b.keys()))\n",
" print(\"First H shell:\")\n",
" print(\" angmom:\", b[\"H\"][0][0])\n",
" print(\" exponents:\", b[\"H\"][0][1])\n",
" print(\" coeffs shape:\", b[\"H\"][0][2].shape)\n",
"except Exception as exc:\n",
" print(\"Could not fetch: \", exc)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "54670763",
"metadata": {},
"outputs": [],
"source": [
"# 5) Integrate with gbasis make_contractions (example for H2)\n",
"from gbasis.parsers import parse_bse\n",
"from gbasis.parsers import make_contractions\n",
"import numpy as np\n",
"\n",
"try:\n",
" basis = parse_bse(\"sto-3g\", atoms=[1])\n",
" atoms = [\"H\", \"H\"]\n",
" coords = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.74]])\n",
" # choose spherical coordinate type\n",
" contractions = make_contractions(basis, atoms, coords, \"spherical\")\n",
" print(f\"Created {len(contractions)} contraction shells for H2\")\n",
"except Exception as exc:\n",
" print(\"Example could not run:\", exc)\n"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ dev = [
"sphinx",
"sphinx_autodoc_typehints",
"sphinx-copybutton",
"basis_set_exchange"
]
doc = [
"numpydoc",
Expand Down
56 changes: 55 additions & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Test gbasis.parsers."""
from gbasis.parsers import make_contractions, parse_gbs, parse_nwchem
from gbasis.parsers import make_contractions, parse_gbs, parse_nwchem, parse_bse
import numpy as np
import pytest
from utils import find_datafile
Expand Down Expand Up @@ -758,6 +758,60 @@ def test_parse_gbs_anorcc():
assert np.allclose(test["H"][3][2], np.array([[1.0000000]]))


def test_parse_bse_sto3g():
"""Test gbasis.parsers.parse_bse for sto-3g (skipped if BSE not installed)."""
pytest.importorskip("basis_set_exchange")
test = parse_bse("sto-3g", atoms=[1])
assert "H" in test
# ensure there is at least one s-shell and that arrays have expected dtypes/shapes
assert any(shell[0] == 0 for shell in test["H"])
assert isinstance(test["H"][0][1], np.ndarray)
assert isinstance(test["H"][0][2], np.ndarray)
assert test["H"][0][2].ndim == 2


def test_parse_bse_empty_elements(monkeypatch):
"""parse_bse should raise on missing/empty elements returned by BSE."""
import sys
import types

fake = types.SimpleNamespace()

def fake_get_basis(basis_set, elements=None):
return {}

fake.get_basis = fake_get_basis
fake.lut = types.SimpleNamespace(element_sym_from_Z=lambda z, normalize=True: "X")

monkeypatch.setitem(sys.modules, "basis_set_exchange", fake)

with pytest.raises(ValueError, match="No basis data found"):
parse_bse("no-such-basis")


def test_parse_bse_unexpected_coeff_layout(monkeypatch):
"""parse_bse should raise a concise error for unexpected coefficient layout."""
import sys
import types

fake = types.SimpleNamespace()

def fake_get_basis(basis_set, elements=None):
return {
"elements": {
"1": {"electron_shells": [{"exponents": [1.0], "coefficients": []}]}
}
}

fake.get_basis = fake_get_basis
fake.lut = types.SimpleNamespace(element_sym_from_Z=lambda z, normalize=True: "H")

monkeypatch.setitem(sys.modules, "basis_set_exchange", fake)

with pytest.raises(ValueError, match="Unexpected coefficients layout"):
parse_bse("bad-coeffs", atoms=[1])


def test_make_contractions():
"""Test gbasis.contractions.make_contractions."""
basis_dict = parse_nwchem(find_datafile("data_sto6g.nwchem"))
Expand Down