Skip to content

Commit 6d3e5b8

Browse files
Merge pull request #16 from ICAMS/feature/al
add support of active set generation
2 parents 287f728 + 5087809 commit 6d3e5b8

120 files changed

Lines changed: 4914 additions & 2309 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CMakeLists.txt

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,44 @@ find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++ PATHS ENV PATH NO_DEFAULT_PA
55
project(pyace C CXX)
66

77
set(CMAKE_CXX_STANDARD 11)
8-
set(CMAKE_CXX_STANDARD_REQUIRED ON)
9-
set(CMAKE_CXX_EXTENSIONS OFF)
10-
8+
set(CMAKE_POSITION_INDEPENDENT_CODE True)
119
# Set source directory
1210
#------------------------------------------------------
1311
set(SOURCE_DIR "src/pyace")
14-
set(ACE_DIR "lib/ace")
15-
12+
set(ACE_ROOT "lib/ace")
13+
set(ACE_SRC ${ACE_ROOT}/src/ace)
14+
set(ACE_INCLUDE ${ACE_ROOT}/src)
15+
16+
set(ACE_EVALUATOR_ROOT ${ACE_ROOT}/ace-evaluator)
17+
set(ACE_EVALUATOR_SRC ${ACE_EVALUATOR_ROOT}/src/ace-evaluator)
18+
set(ACE_EVALUATOR_EXTRA ${ACE_EVALUATOR_ROOT}/src/extra)
19+
set(ACE_EVALUATOR_INCLUDE ${ACE_EVALUATOR_ROOT}/src)
1620
#Headers
1721
#-------------------------------------------------------
1822
#include_directories(${SOURCE_DIR})
1923
include_directories(${SOURCE_DIR}/ace)
2024
include_directories(${SOURCE_DIR}/ace-evaluator)
2125
include_directories(${SOURCE_DIR}/utils)
2226

23-
include_directories(${ACE_DIR}/src/fitting)
24-
include_directories(${ACE_DIR}/ace-evaluator/src)
25-
include_directories(${ACE_DIR}/ace-evaluator/extra)
27+
include_directories(${ACE_INCLUDE})
28+
include_directories(${ACE_EVALUATOR_INCLUDE})
29+
#include_directories(${ACE_DIR}/ace-evaluator/extra)
2630

2731

2832
# Add external libraries
2933
#-------------------------------------------------------
3034

31-
#add_subdirectory(lib/yaml-cpp)
3235
set(YAML_INCLUDE_PATH lib/ace/ace-evaluator/lib/yaml-cpp/include)
3336
include_directories(${YAML_INCLUDE_PATH})
3437

3538
set(WIGNER_PATH lib/ace/utils/wigner-cpp)
36-
set(WIGNER_INCLUDE_PATH ${WIGNER_PATH}/include/wigner)
39+
set(WIGNER_INCLUDE_PATH ${WIGNER_PATH}/include)
3740
include_directories(${WIGNER_INCLUDE_PATH})
3841

42+
set(CNPY_PATH lib/ace/utils/cnpy)
43+
set(CNPY_INCLUDE_PATH lib/ace/utils/)
44+
include_directories(${CNPY_INCLUDE_PATH})
45+
3946
add_subdirectory(lib/pybind11)
4047
if (NOT CMAKE_BUILD_TYPE)
4148
set(CMAKE_BUILD_TYPE Release)
@@ -50,60 +57,60 @@ add_subdirectory(lib/ace)
5057

5158
#Now add sources
5259
#--------------------------------------------------------
53-
set(SOURCES_SHARMONICS "${ACE_DIR}/src/fitting/ace_couplings.cpp"
54-
"${ACE_DIR}/src/fitting/ace_spherical_polar.cpp"
60+
set(SOURCES_SHARMONICS "${ACE_SRC}/ace_couplings.cpp"
61+
"${ACE_SRC}/ace_spherical_polar.cpp"
5562
"${SOURCE_DIR}/ace/ace_spherical_polar_binding.cpp"
5663
)
5764

58-
set(SOURCES_COUPLING "${ACE_DIR}/src/fitting/ace_couplings.cpp"
59-
"${ACE_DIR}/src/fitting/ace_clebsch_gordan.cpp"
65+
set(SOURCES_COUPLING "${ACE_SRC}/ace_couplings.cpp"
66+
"${ACE_SRC}/ace_clebsch_gordan.cpp"
6067
"${SOURCE_DIR}/ace/ace_coupling_binding.cpp")
6168

62-
set(SOURCES_BASIS "${ACE_DIR}/ace-evaluator/src/ace_c_basis.cpp"
63-
"${ACE_DIR}/ace-evaluator/src/ace_radial.cpp"
64-
"${ACE_DIR}/ace-evaluator/src/ace_spherical_cart.cpp"
65-
"${ACE_DIR}/ace-evaluator/src/ace_abstract_basis.cpp"
66-
"${ACE_DIR}/ace-evaluator/src/ace_flatten_basis.cpp"
67-
"${ACE_DIR}/ace-evaluator/src/ships_radial.cpp"
68-
"${ACE_DIR}/src/fitting/ace_b_basis.cpp"
69-
"${ACE_DIR}/src/fitting/ace_b_basisfunction.cpp"
70-
"${ACE_DIR}/src/fitting/ace_clebsch_gordan.cpp"
71-
"${ACE_DIR}/src/fitting/ace_couplings.cpp"
72-
"${ACE_DIR}/src/fitting/ace_yaml_input.cpp"
69+
set(SOURCES_BASIS "${ACE_EVALUATOR_SRC}/ace_c_basis.cpp"
70+
"${ACE_EVALUATOR_SRC}/ace_radial.cpp"
71+
"${ACE_EVALUATOR_SRC}/ace_spherical_cart.cpp"
72+
"${ACE_EVALUATOR_SRC}/ace_abstract_basis.cpp"
73+
"${ACE_EVALUATOR_SRC}/ace_flatten_basis.cpp"
74+
"${ACE_EVALUATOR_SRC}/ships_radial.cpp"
75+
"${ACE_SRC}/ace_b_basis.cpp"
76+
"${ACE_SRC}/ace_b_basisfunction.cpp"
77+
"${ACE_SRC}/ace_clebsch_gordan.cpp"
78+
"${ACE_SRC}/ace_couplings.cpp"
79+
"${ACE_SRC}/ace_yaml_input.cpp"
7380
"${SOURCE_DIR}/ace-evaluator/ace_c_basis_binding.cpp"
7481
"${SOURCE_DIR}/ace-evaluator/ace_bbasis_spec_helper.cpp"
7582
"${SOURCE_DIR}/ace/ace_radial_helper.cpp"
7683
"${SOURCE_DIR}/ace/ace_c_basisfunction_helper.cpp"
7784
"${SOURCE_DIR}/ace/ace_c_basis_helper.cpp"
7885
)
7986

80-
set(SOURCES_EVALUATOR "${ACE_DIR}/ace-evaluator/src/ace_c_basis.cpp"
81-
"${ACE_DIR}/ace-evaluator/src/ace_abstract_basis.cpp"
82-
"${ACE_DIR}/ace-evaluator/src/ace_flatten_basis.cpp"
83-
"${ACE_DIR}/ace-evaluator/src/ace_evaluator.cpp"
84-
"${ACE_DIR}/ace-evaluator/src/ace_recursive.cpp"
85-
"${ACE_DIR}/ace-evaluator/extra/ace_atoms.cpp"
86-
"${ACE_DIR}/ace-evaluator/src/ace_radial.cpp"
87-
"${ACE_DIR}/ace-evaluator/src/ace_spherical_cart.cpp"
88-
"${ACE_DIR}/src/fitting/ace_b_evaluator.cpp"
89-
"${ACE_DIR}/src/fitting/ace_b_basis.cpp"
90-
"${ACE_DIR}/src/fitting/ace_clebsch_gordan.cpp"
91-
"${ACE_DIR}/src/fitting/ace_yaml_input.cpp"
92-
"${ACE_DIR}/src/fitting/ace_couplings.cpp"
87+
set(SOURCES_EVALUATOR "${ACE_EVALUATOR_SRC}/ace_c_basis.cpp"
88+
"${ACE_EVALUATOR_SRC}/ace_abstract_basis.cpp"
89+
"${ACE_EVALUATOR_SRC}/ace_flatten_basis.cpp"
90+
"${ACE_EVALUATOR_SRC}/ace_evaluator.cpp"
91+
"${ACE_EVALUATOR_SRC}/ace_recursive.cpp"
92+
"${ACE_EVALUATOR_EXTRA}/ace_atoms.cpp"
93+
"${ACE_EVALUATOR_SRC}/ace_radial.cpp"
94+
"${ACE_EVALUATOR_SRC}/ace_spherical_cart.cpp"
95+
"${ACE_SRC}/ace_b_evaluator.cpp"
96+
"${ACE_SRC}/ace_b_basis.cpp"
97+
"${ACE_SRC}/ace_clebsch_gordan.cpp"
98+
"${ACE_SRC}/ace_yaml_input.cpp"
99+
"${ACE_SRC}/ace_couplings.cpp"
93100
"${SOURCE_DIR}/ace-evaluator/ace_evaluator_binding.cpp"
94101
)
95102

96-
set(SOURCES_CATOMICENVIRONMENT "${ACE_DIR}/ace-evaluator/extra/ace_atoms.cpp"
103+
set(SOURCES_CATOMICENVIRONMENT "${ACE_EVALUATOR_EXTRA}/ace_atoms.cpp"
97104
"${SOURCE_DIR}/utils/ace_atoms_binding.cpp")
98105

99106

100107
set(SOURCES_CALCULATOR
101-
"${ACE_DIR}/ace-evaluator/src/ace_radial.cpp"
102-
"${ACE_DIR}/ace-evaluator/src/ace_abstract_basis.cpp"
103-
"${ACE_DIR}/ace-evaluator/extra/ace_atoms.cpp"
104-
"${ACE_DIR}/ace-evaluator/src/ace_spherical_cart.cpp"
105-
"${ACE_DIR}/ace-evaluator/extra/ace_calculator.cpp"
106-
"${ACE_DIR}/ace-evaluator/src/ace_evaluator.cpp"
108+
"${ACE_EVALUATOR_SRC}/ace_radial.cpp"
109+
"${ACE_EVALUATOR_SRC}/ace_abstract_basis.cpp"
110+
"${ACE_EVALUATOR_EXTRA}/ace_atoms.cpp"
111+
"${ACE_EVALUATOR_SRC}/ace_spherical_cart.cpp"
112+
"${ACE_EVALUATOR_EXTRA}/ace_calculator.cpp"
113+
"${ACE_EVALUATOR_SRC}/ace_evaluator.cpp"
107114
"${SOURCE_DIR}/utils/ace_calculator_binding.cpp")
108115

109116

@@ -126,9 +133,11 @@ pybind11_add_module(catomicenvironment ${SOURCES_CATOMICENVIRONMENT} )
126133

127134
pybind11_add_module(basis ${SOURCES_BASIS} )
128135
target_link_libraries(basis PRIVATE yaml-cpp-pace)
136+
target_link_libraries(basis PRIVATE cnpy)
129137

130138
pybind11_add_module(evaluator ${SOURCES_EVALUATOR} ${SOURCES_BASIS} )
131139
target_link_libraries(evaluator PRIVATE yaml-cpp-pace)
140+
target_link_libraries(evaluator PRIVATE cnpy)
132141

133142
pybind11_add_module(calculator ${SOURCES_CALCULATOR} )
134143

bin/pace_activeset.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/usr/bin/env python
2+
import argparse
3+
import logging
4+
import os
5+
import psutil
6+
7+
import numpy as np
8+
import pandas as pd
9+
10+
from pyace import BBasisConfiguration, ACEBBasisSet, aseatoms_to_atomicenvironment
11+
from pyace.activelearning import compute_B_projections, compute_active_set, compute_active_set_by_batches, \
12+
compute_A_active_inverse, compute_extrapolation_grade, compute_number_of_functions, \
13+
count_number_total_atoms_per_species_type, save_active_inverse_set
14+
from pyace.preparedata import sizeof_fmt
15+
16+
log = logging.getLogger()
17+
18+
parser = argparse.ArgumentParser(prog="pace_activeset",
19+
description="Utility to compute active set for PACE (.yaml) potential")
20+
21+
# parser.add_argument("potential_file", help="B-basis file name (.yaml)", type=str, nargs='+', default=[])
22+
parser.add_argument("potential_file", help="B-basis file name (.yaml)", type=str)
23+
parser.add_argument("-d", "--dataset", help="Dataset file name, ex.: filename.pckl.gzip", type=str)
24+
parser.add_argument("-f", "--full", help="Compute active set on full (linearized) design matrix",
25+
action='store_true')
26+
parser.add_argument("-b", "--batch_size", help="Batch size (number of structures) considered simultaneously."
27+
"If not provided - all dataset at once is considered",
28+
default="auto", type=str)
29+
parser.add_argument("-g", "--gamma_tolerance", help="Gamma tolerance",
30+
default=1.01, type=float)
31+
parser.add_argument("-i", "--maxvol_iters", help="Number of maximum iteration in MaxVol algorithm",
32+
default=300, type=int)
33+
34+
parser.add_argument("-r", "--maxvol_refinement", help="Number of refinements (epochs)",
35+
default=5, type=int)
36+
37+
parser.add_argument("-m", "--memory-limit", help="Memory limit (i.e. 1GB, 500MB or 'auto')", default="auto", type=str)
38+
39+
args_parse = parser.parse_args()
40+
potential_file = args_parse.potential_file
41+
dataset_filename = args_parse.dataset
42+
batch_size = args_parse.batch_size
43+
gamma_tolerance = args_parse.gamma_tolerance
44+
maxvol_iters = args_parse.maxvol_iters
45+
maxvol_refinement = args_parse.maxvol_refinement
46+
mem_lim = args_parse.memory_limit
47+
is_full = args_parse.full
48+
if mem_lim == "auto":
49+
# determine 80% of available memory
50+
mem_lim = int(0.8 * psutil.virtual_memory().available)
51+
else:
52+
mem_lim = mem_lim.replace("GB", "*2**30").replace("MB", "*2**20")
53+
mem_lim = eval(mem_lim)
54+
55+
data_path = os.environ.get("PACEMAKERDATAPATH", "")
56+
if data_path:
57+
log.info("Data path set to $PACEMAKERDATAPATH = {}".format(data_path))
58+
59+
if os.path.isfile(dataset_filename):
60+
dataset_filename = dataset_filename
61+
elif os.path.isfile(os.path.join(data_path, dataset_filename)):
62+
dataset_filename = os.path.join(data_path, dataset_filename)
63+
else:
64+
raise RuntimeError("File {} not found".format(dataset_filename))
65+
66+
df = pd.read_pickle(dataset_filename, compression="gzip")
67+
df.reset_index(drop=True, inplace=True)
68+
log.info("Number of structures: {}".format(len(df)))
69+
log.info("Potential file: ".format(potential_file))
70+
71+
bconf = BBasisConfiguration(potential_file)
72+
bbasis = ACEBBasisSet(bconf)
73+
nfuncs = compute_number_of_functions(bbasis)
74+
if is_full:
75+
n_projections = [p * bbasis.map_embedding_specifications[st].ndensity for st, p in enumerate(nfuncs)]
76+
else: # linear
77+
n_projections = nfuncs
78+
79+
elements_to_index_map = bbasis.elements_to_index_map
80+
elements_name = bbasis.elements_name
81+
cutoffmax = bbasis.cutoffmax
82+
83+
ATOMIC_ENV_COLUMN = "atomic_env"
84+
85+
rebuild_atomic_env = False
86+
if ATOMIC_ENV_COLUMN not in df.columns:
87+
rebuild_atomic_env = True
88+
else:
89+
# check if cutoff is not smaller than requested now
90+
try:
91+
metadata_kwargs = df.metadata_dict[ATOMIC_ENV_COLUMN + "_kwargs"]
92+
metadata_cutoff = metadata_kwargs["cutoff"]
93+
if metadata_cutoff < cutoffmax:
94+
log.warning("WARNING! Column {} was constructed with smaller cutoff ({}A) "
95+
"that necessary now ({}A). "
96+
"Neighbourlists will be re-built".format(ATOMIC_ENV_COLUMN, metadata_cutoff,
97+
cutoffmax))
98+
rebuild_atomic_env = True
99+
else:
100+
log.info("Column '{}': existing cutoff ({}A) >= "
101+
"requested cutoff ({}A), skipping...".format(ATOMIC_ENV_COLUMN, metadata_cutoff,
102+
cutoffmax))
103+
rebuild_atomic_env = False
104+
105+
except KeyboardInterrupt as e:
106+
raise e
107+
except Exception as e:
108+
log.info("Could not extract cutoff metadata "
109+
"for column '{}' (error: {}). Please ensure the valid cutoff for "
110+
"precomputed neighbourlists".format(ATOMIC_ENV_COLUMN, e))
111+
rebuild_atomic_env = False
112+
113+
if rebuild_atomic_env:
114+
log.info("Constructing {} column, cutoffmax={}, elements_to_index_map={}".format(ATOMIC_ENV_COLUMN, cutoffmax,
115+
elements_to_index_map))
116+
df[ATOMIC_ENV_COLUMN] = df["ase_atoms"].apply(aseatoms_to_atomicenvironment,
117+
cutoff=cutoffmax, elements_mapper_dict=elements_to_index_map)
118+
119+
atomic_env_list = df[ATOMIC_ENV_COLUMN]
120+
structure_ind_list = df.index
121+
total_number_of_atoms_per_species_type = count_number_total_atoms_per_species_type(atomic_env_list)
122+
123+
number_of_projection_entries = 0
124+
required_active_set_memory = 0
125+
for st in total_number_of_atoms_per_species_type.keys():
126+
log.info("\tElement: {}, # atoms: {}, # B-func: {}, # projections: {}".format(elements_name[st],
127+
total_number_of_atoms_per_species_type[
128+
st],
129+
nfuncs[st], n_projections[st]
130+
))
131+
number_of_projection_entries += total_number_of_atoms_per_species_type[st] * n_projections[st]
132+
required_active_set_memory += n_projections[st] ** 2
133+
134+
required_projections_memory = number_of_projection_entries * 8 # float64
135+
required_active_set_memory *= 8 # in bytes, float64
136+
log.info("Required memory to store complete dataset projections: {}".format(sizeof_fmt(required_projections_memory)))
137+
log.info("Required memory to store active set: {}".format(sizeof_fmt(required_active_set_memory)))
138+
139+
if batch_size == "auto":
140+
log.info("Automatic batch_size determination")
141+
log.info("Memory limit: {}".format(sizeof_fmt(mem_lim)))
142+
if 2 * required_projections_memory + required_active_set_memory < mem_lim:
143+
batch_size = None
144+
else:
145+
nsplits = int(np.ceil(2 * required_projections_memory // (mem_lim - required_active_set_memory)))
146+
batch_size = int(np.round(len(atomic_env_list) / nsplits))
147+
elif batch_size == "None" or batch_size == "none":
148+
batch_size = None
149+
else:
150+
batch_size = int(batch_size)
151+
152+
if is_full:
153+
active_set_inv_filename = potential_file.replace(".yaml", ".asi.nonlinear")
154+
log.info("FULL (non-linear) matrix will be used for active set calculation")
155+
else:
156+
active_set_inv_filename = potential_file.replace(".yaml", ".asi")
157+
log.info("LINEAR matrix will be used for active set calculation")
158+
159+
if batch_size is None:
160+
# single shot MaxVol
161+
log.info("Single-run (no batch_size is provided)")
162+
log.info("Compute B-projections")
163+
A0_proj_dict = compute_B_projections(bbasis, atomic_env_list, is_full=is_full)
164+
log.info("B-projections computed:")
165+
for st, A0_proj in A0_proj_dict.items():
166+
log.info("\tElement: {}, B-projections shape: {}".format(elements_name[st], A0_proj.shape))
167+
168+
log.info("Compute active set (using MaxVol algorithm)")
169+
A_active_set_dict = compute_active_set(A0_proj_dict, tol=gamma_tolerance, max_iters=maxvol_iters, verbose=True)
170+
log.info("Compute pseudoinversion of active set")
171+
A_active_inverse_set = compute_A_active_inverse(A_active_set_dict)
172+
log.info("Done")
173+
gamma_dict = compute_extrapolation_grade(A0_proj_dict, A_active_inverse_set)
174+
gamma_max = {k: gg.max() for k, gg in gamma_dict.items()}
175+
176+
for st, AS_inv in A_active_inverse_set.items():
177+
log.info("\tElement: {}, Active set inv. shape: {}, gamma_max: {:.3f}".format(elements_name[st], AS_inv.shape,
178+
gamma_max[st]))
179+
log.info("Saving Active Set Inversion (ASI) to {}".format(active_set_inv_filename))
180+
with open(active_set_inv_filename, "wb") as f:
181+
np.savez(f, **{elements_name[st]: v for st, v in A_active_inverse_set.items()})
182+
log.info("Saving done to {} ({})".format(active_set_inv_filename, sizeof_fmt(active_set_inv_filename)))
183+
else:
184+
# multiple round maxvol
185+
log.info("Approximated MaxVol by batches")
186+
log.info("Batch size: {}".format(batch_size))
187+
nsplits = len(atomic_env_list) // batch_size
188+
atomic_env_batches = np.array_split(atomic_env_list, nsplits)
189+
atomic_env_batches = [b.values for b in atomic_env_batches]
190+
structure_env_batches = np.array_split(structure_ind_list, nsplits)
191+
structure_env_batches = [b.values for b in structure_env_batches]
192+
log.info("Number of batches: {}".format(len(atomic_env_batches)))
193+
194+
log.info("Compute approximate active set (using batched MaxVol algorithm)")
195+
(best_gamma, best_active_sets_dict, _) = \
196+
compute_active_set_by_batches(
197+
bbasis,
198+
atomic_env_batches=atomic_env_batches,
199+
structure_ind_batches=structure_env_batches,
200+
gamma_tolerance=gamma_tolerance,
201+
maxvol_iters=maxvol_iters,
202+
n_refinement_iter=maxvol_refinement,
203+
save_interim_active_set=True,
204+
is_full=is_full
205+
)
206+
log.info("Compute pseudoinversion of active set")
207+
A_active_inverse_set = compute_A_active_inverse(best_active_sets_dict)
208+
for st, AS_inv in A_active_inverse_set.items():
209+
log.info("\tElement: {}, Active set inv. shape: {}, gamma_max: {:.3f}".format(elements_name[st], AS_inv.shape,
210+
best_gamma[st]))
211+
log.info("Saving Active Set Inversion (ASI) to {}".format(active_set_inv_filename))
212+
save_active_inverse_set(active_set_inv_filename, A_active_inverse_set, elements_name=elements_name)
213+
log.info("Saving done to {} ({})".format(active_set_inv_filename, sizeof_fmt(active_set_inv_filename)))

0 commit comments

Comments
 (0)