diff --git a/.github/workflows/generate_reference_hashes.yml b/.github/workflows/generate_reference_hashes.yml new file mode 100644 index 00000000..6c23dde8 --- /dev/null +++ b/.github/workflows/generate_reference_hashes.yml @@ -0,0 +1,47 @@ +# This workflow will install Python dependencies , analysator and generate verification set to /wrk-kappa/group/spacephysics/analysator/CI/verification_sets/ under new folder based on commit id +name: generate_reference_hashes + +on: + workflow_dispatch: + + +jobs: + + turso_system: + if: github.repository_owner == 'fmihpc' + runs-on: carrington + timeout-minutes: 60 + strategy: + fail-fast: false + max-parallel: 2 + matrix: + extras: ["testpackage"] + steps: + - uses: actions/checkout@v4 + timeout-minutes: 5 + - name: Install uv + uses: astral-sh/setup-uv@v6 + timeout-minutes: 5 + - name: Install dependencies + timeout-minutes: 5 + run: | + export TMPDIR=$RUNNER_TEMP + export UV_LINK_MODE=copy + module purge + module load Python/3.10.4-GCCcore-11.3.0 + uv venv CI_env + . CI_env/bin/activate + uv pip install --editable ../analysator[${{ matrix.extras }}] + - name: Produce reference hashes + run: | + export TMPDIR=$RUNNER_TEMP + module purge + module load Python/3.10.4-GCCcore-11.3.0 + . CI_env/bin/activate + + DATAPATH="/turso/group/spacephysics/analysator/CI/verification_sets_hashes/$(git rev-parse HEAD)" + mkdir $DATAPATH + touch $DATAPATH/.lockfile + srun --constraint="carrington" --mem=5G -c 1 bash -c "ml Python/3.10; . CI_env/bin/activate; python testpackage/testpackage_hashes.py -g $DATAPATH" + + rm $DATAPATH/.lockfile diff --git a/.github/workflows/test_compare_images.yml b/.github/workflows/test_compare_images.yml index 5bc71764..dcc554b5 100644 --- a/.github/workflows/test_compare_images.yml +++ b/.github/workflows/test_compare_images.yml @@ -13,9 +13,55 @@ concurrency: jobs: + hashes_test: + if: github.repository_owner == 'fmihpc' + runs-on: carrington + strategy: + fail-fast: false + matrix: + extras: ["all"] + steps: + - uses: actions/checkout@v4 + timeout-minutes: 5 + - name: Install uv + uses: astral-sh/setup-uv@v6 + timeout-minutes: 5 + - name: Install dependencies + timeout-minutes: 5 + run: | + module load GCCcore/11.3.0 + export TMPDIR=$RUNNER_TEMP + uv venv CI_env + . CI_env/bin/activate + uv pip install cmake + uv pip install --editable ../analysator[${{ matrix.extras }}] + uv pip install -r requirements-backend.txt + - name: Trial imports + run: | + export TMPDIR=$RUNNER_TEMP + . CI_env/bin/activate + python -c 'import analysator as pt' + python -c 'import analysator as pt;pt.plot.__dict__' + python -c 'import analysator as pt;pt.calculations.__dict__' + python -c 'import analysator as pt;pt.vlsvfile.__dict__' + python -c 'import analysator as pt;pt.miscellaneous.__dict__' + + - name: Testing backend and vlsvreader + run: | + . CI_env/bin/activate + verf_loc="/turso/group/spacephysics/analysator/CI/verification_sets_hashes/" + verfset=$(ls -lth $verf_loc | grep ^d | head -n1 | grep -Po '\w+$') + + if [[ -f $verf_loc/$verfset/.lockfile ]]; then + echo ".lockfile found in $verf_loc/$verfset, not comparing, something probably went wrong removing the lockfile" + exit 1 + fi + srun --constraint="carrington" --mem=5G -c 1 bash -c "ml purge; ml Python/3.10; . CI_env/bin/activate; python ./testpackage/testpackage_hashes.py -c '$verf_loc/$verfset/'" + turso_system: if: github.repository_owner == 'fmihpc' runs-on: carrington + needs: hashes_test timeout-minutes: 120 strategy: fail-fast: false diff --git a/.github/workflows/test_compare_images_full.yml b/.github/workflows/test_compare_images_full.yml index 6ec2da7f..dd1ca18b 100644 --- a/.github/workflows/test_compare_images_full.yml +++ b/.github/workflows/test_compare_images_full.yml @@ -12,6 +12,52 @@ on: jobs: + hashes_test: + if: github.repository_owner == 'fmihpc' + runs-on: carrington + strategy: + fail-fast: false + matrix: + extras: ["all"] + steps: + - uses: actions/checkout@v4 + timeout-minutes: 5 + - name: Install uv + uses: astral-sh/setup-uv@v6 + timeout-minutes: 5 + - name: Install dependencies + timeout-minutes: 5 + run: | + module load GCCcore/11.3.0 + export TMPDIR=$RUNNER_TEMP + uv venv CI_env + . CI_env/bin/activate + uv pip install cmake + uv pip install --editable ../analysator[${{ matrix.extras }}] + uv pip install -r requirements-backend.txt + - name: Trial imports + run: | + export TMPDIR=$RUNNER_TEMP + . CI_env/bin/activate + python -c 'import analysator as pt' + python -c 'import analysator as pt;pt.plot.__dict__' + python -c 'import analysator as pt;pt.calculations.__dict__' + python -c 'import analysator as pt;pt.vlsvfile.__dict__' + python -c 'import analysator as pt;pt.miscellaneous.__dict__' + + - name: Testing backend and vlsvreader + run: | + . CI_env/bin/activate + verf_loc="/turso/group/spacephysics/analysator/CI/verification_sets_hashes/" + verfset=$(ls -lth $verf_loc | grep ^d | head -n1 | grep -Po '\w+$') + + if [[ -f $verf_loc/$verfset/.lockfile ]]; then + echo ".lockfile found in $verf_loc/$verfset, not comparing, something probably went wrong removing the lockfile" + exit 1 + fi + srun --constraint="carrington" --mem=5G -c 1 bash -c "ml purge; ml Python/3.10; . CI_env/bin/activate; python ./testpackage/testpackage_hashes.py -c '$verf_loc/$verfset/'" + + turso_system: if: github.repository_owner == 'fmihpc' runs-on: carrington diff --git a/.github/workflows/test_python_turso.yml b/.github/workflows/test_python_turso.yml index 068a7f41..d4a779c7 100644 --- a/.github/workflows/test_python_turso.yml +++ b/.github/workflows/test_python_turso.yml @@ -12,6 +12,10 @@ on: - cron: '0 8 * * MON' workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + jobs: turso_system: @@ -39,6 +43,7 @@ jobs: . CI_env/bin/activate uv pip install cmake uv pip install --editable ../analysator[${{ matrix.extras }}] + uv pip install -r requirements-backend.txt - name: Trial imports run: | export TMPDIR=$RUNNER_TEMP diff --git a/testpackage/testpackage_hashes.py b/testpackage/testpackage_hashes.py new file mode 100644 index 00000000..61efb3e0 --- /dev/null +++ b/testpackage/testpackage_hashes.py @@ -0,0 +1,497 @@ +import analysator as pt +import numpy as np +import vlsvrs +import os +import hashlib +import pickle +import importlib +import argparse + +datalocation = "/turso/group/spacephysics/analysator/CI/analysator-test-data/vlasiator/" +files = [ + "3D/FID/bulk1/bulk1.0000995.vlsv", + "3D/FHA/bulk1/bulk1.0000990.vlsv", + "2D/BGA/zero_ehall_layers_23/bulk.0000380.vlsv", +] + +parser = argparse.ArgumentParser( + prog="ciTester", + description="Used for generating and comparing hashes from analysator output for CI testing.", +) +parser.add_argument( + "--generate", "-g", help="Generate hashdump into given path", const=str, nargs="?" +) +parser.add_argument( + "--compare", "-c", help="Compare against given hashdump", const=str, nargs="?" +) +args = parser.parse_args() +generate_path = args.generate +compare_path = args.compare + + +class Tester: + def __init__(self): + self.filename = None + self.vlsvobj = None + self.hashes_dict_rust = {} + self.hashes_dict_python = {} + + def changeFile(self, filename): + self.filename = filename + + def loadPickle(self, file): + return pickle.load(file) + + def dumpPickle(self, file): + hashdict = {"python": self.hashes_dict_python, "rust": self.hashes_dict_rust} + pickle.dump(hashdict, file) + + def dumpIntoFile(self, indict, dumpname, generate_path): + path_to_file = os.path.join(generate_path, dumpname) + if os.path.isfile(path_to_file): + print(f"::warning:: {dumpname} found in {generate_path}, overwriting!") + with open(path_to_file, "w") as file: + for filename, funcdict in indict.items(): + file.write("File: " + filename + "\n") + for funccall, hashdict in funcdict.items(): + file.write("\tFunction: " + funccall + "\n") + for arg, hash_and_op in hashdict.items(): + hash = hash_and_op[0] + op = hash_and_op[1] + file.write("\t\t" + f"{arg:<30} {hash} {op}" + "\n") + file.close() + assert self.loadFromFile(dumpname, generate_path) == indict + + def loadFromFile(self, dumpname, compare_path): + outdict = {} + path_to_file = os.path.join(compare_path, dumpname) + if not os.path.isfile(path_to_file): + print(f"::warning::Tried to load file {path_to_file} but no file found, skipping this dictionary") + return False + + with open(path_to_file, "r") as file: + for line in file: + line = line.rstrip("\n") + if line[:5] == "File:": + filename = line.split(" ")[1] + if filename not in outdict: + outdict[filename] = {} + continue + if ("Function:" in line and "[" not in line): # bit stupid but works for now + function = line.split(" ")[1] + # There should not be multiples of function with same filename! + try: + outdict[filename][function] = {} + except KeyError: + raise IOError("Invalid format of the input file") + + elif "[" in line or "NOTARG" in line: + # above is bit stupid but should filter it little bit since the hash lines should have a list in them + listHashInfo = [ + item.strip("\t") for item in line.split(" ") if item != "" + ] + outdict[filename][function][listHashInfo[0]] = [ + listHashInfo[1], + listHashInfo[2], + ] + else: + # We should never end up here, but if we do, the dict was not read correctly likely badly formatted + raise IOError("File is likely not formatted correctly") + file.close() + return outdict + + def loadobj(self, backend=None): + if not backend or backend.lower() == "rust": + self.vlsvobj_rust = vlsvrs.VlsvFile(self.filename) + if not backend or backend.lower() == "python": + self.vlsvobj_python = pt.vlsvfile.VlsvReader(self.filename) + + def setHashTarget(self, backend): + if backend == "rust": + self.vlsvobj = self.vlsvobj_rust + elif backend == "python": + self.vlsvobj = self.vlsvobj_python + else: + print("None set, give valid backend") + + def hash( + self, + func, + args, + op=None, + opargs=None, + both=False, + loop=False, + flatten=False, + sort=False, + argkey_name=None, + novlsv=False, + ): + + def update(vlsvobj, op, opargs, args, hashdict, loop=False): + # If we want to repeat same function func with different arguments + if loop: + for arg in args: + update(vlsvobj, op, opargs, arg, hashdict) + return 0 + if argkey_name: + argkey = str(argkey_name + "_NOTARG") + else: + argkey = str(args) + + opsname = "_" + str(op) + "_" + str(opargs) + # Get the method of the vlsvobj that matches the given func str + if not novlsv: + t = getattr(vlsvobj, func) + else: + t = func + # Handle arguments and call the function with the given args to get return value + if type(args) is dict: + retval = t(**args) + elif type(args) is list: + retval = t(*args) + else: + raise IOError(f"Wrong args type: {type(args)} {args}") + # If we want to do operations on the retval for example reshaping, type chaning or sorting + if op and opargs: + # Make into list for handling + if type(op) is not list: + op = [op] + opargs = [opargs] + + for i, f in enumerate(op): + try: + fun = getattr(retval, f) + except AttributeError: + try: + # if given function is not method of retval we make retval the argument of function + if "." in f: + funcl = f.split(".") + # in case it is inside a module like numpy we need to get instance of the module + funcl[0] = importlib.import_module(funcl[0]) + + fun = getattr(funcl[0], funcl[1]) + else: + fun = f + opargs[i] = [retval] + except AttributeError as e: + raise AttributeError(f"Did not find func {func} to operate with: {e}") + + retval = fun(*opargs[i]) + + # save hash of the retval as array + retval = np.array(retval) + if sort: + retval.sort() + funname = func.__name__ if callable(func) else func + if self.filename not in hashdict.keys(): + hashdict[self.filename] = {} + if func not in hashdict[self.filename]: + hashdict[self.filename][funname] = {} + bytedata = retval.tobytes() + if not flatten: + bytedata += np.array(retval.shape).tobytes() + + hashdict[self.filename][funname][argkey] = [ + hashlib.sha256(bytedata).hexdigest(), + opsname, + ] + + if not both: + if self.vlsvobj == self.vlsvobj_python: + hashdict = self.hashes_dict_python + elif self.vlsvobj == self.vlsvobj_rust: + hashdict = self.hashes_dict_rust + else: + raise SystemError("vlsvobj was not loaded, something went wrong") + update(self.vlsvobj, op, opargs, args, hashdict, loop) + else: + update(self.vlsvobj_rust, op, opargs, args, self.hashes_dict_rust, loop) + update(self.vlsvobj_python, op, opargs, args, self.hashes_dict_python, loop) + + def compare(self, funcpy, argspy, funcrust, argsrust): + try: + py = getattr(self.vlsvobj_python, funcpy) + retval_py = py(**argspy) + + rust = getattr(self.vlsvobj_rust, funcrust) + retval_rust = rust(**argsrust) + + except Exception as e: + raise e + + if type(retval_py) is dict and type(retval_rust) is dict: + print( + "Checking dictionaries between vlsvrs and python from function call", + "\n (python):", + str(funcpy), + "\n (rust):", + str(funcrust), + "\nThis may take a moment!", + ) + stack = list(retval_rust.keys()) + if (len(retval_py) != len(retval_rust)) and len(list(retval_py.keys())) != 0: + raise SystemError("one or both of the dictionaries returned by the readers are empty") + + for key in retval_py.keys(): + if retval_rust[key] == retval_py[key]: + stack.remove(key) # maybe a some ohter way to remove it is faster? + else: + raise SystemError("returned dictionary values between vlsvreader and vlsvrs do not match") + + if len(stack) != 0: + raise KeyError("returned dictionry from vlsvrs contains keys not present in dictonary returned by python.") + + #Make a hash of the returned value if they are the same, this is in case both vlsvrs and analysator read_velocity_cells changes to differ from reference + self.hash( + list, + [retval_py.items()], + novlsv=True, + both=True, + flatten=True, + argkey_name="read_velocity_cells", + ) + return True + else: + raise NotImplementedError + + def interpolationtest2d(self, varname): + N = 1000 # int(np.sqrt(800)) + delta = 60e6 + xmin = 45.0e6 + xcoords = np.linspace(xmin, xmin + delta, N) + ymin = -37.51e6 - 1e7 * 0 + ycoords = np.linspace(ymin, delta + ymin, N) + X, Y, Z = np.meshgrid(xcoords, ycoords, np.array([-0.25e6])) + ncoords = np.prod(X.shape) + + coords = np.hstack( + ( + np.reshape(X, (ncoords))[:, np.newaxis], + np.reshape(Y, (ncoords))[:, np.newaxis], + np.reshape(Z, (ncoords))[:, np.newaxis], + ) + ) + + self.hash( + "read_interpolated_variable", + [varname, coords], + argkey_name=varname, + flatten=False, + ) + + def interpolationtest3d(self): + RE = 6371e3 + coords = [ + [5 * RE, RE, 0.5 * RE], + np.array([[10 * RE, RE, 0.5 * RE], [5 * RE, RE, 0.1 * RE]]), + np.array([[5 * RE, RE, 0.5 * RE], [8 * RE, RE, 0.1 * RE]]), + ] + for i, coord in enumerate(coords): + self.hash( + "read_interpolated_variable", + ["proton/vg_rho", coord], + argkey_name=f"proton/vg_rho_{i}", + flatten=False, + ) + self.hash( + "read_interpolated_variable", + ["proton/vg_v", coord], + argkey_name=f"proton/vg_v_{i}", + flatten=False, + ) + self.hash( + "read_interpolated_variable", + ["proton/vg_ptensor", coord], + argkey_name=f"proton/vg_ptensor_{i}", + flatten=False, + ) + + def compareReaders(self,variable_map=None): + print("comparing hashes between vlsvrs and vlsvreader") + + # function calls may not match, can be used to map from rust vlsvrs calls to py calls + key_map_rust_to_py = { + "read_variable_raw": "read_variable", + "read_variable": "read_variable", + "list": "list", + } + for file in self.hashes_dict_rust.keys(): + print(f"------{file}------") + for key in self.hashes_dict_rust[file].keys(): + if key in key_map_rust_to_py: + py_key = key_map_rust_to_py[key] + else: + py_key = key + py_dict = self.hashes_dict_python[file][py_key] + rust_dict = self.hashes_dict_rust[file][key] + for argcall in rust_dict.keys(): + py_argcall = argcall + if variable_map and argcall in variable_map: + py_argcall = variable_map[argcall] + if rust_dict[argcall][0] != py_dict[py_argcall][0]: + print(rust_dict[argcall][0], py_dict[py_argcall][0]) + raise SystemError(f"Hashes do not match for call {argcall}!") + else: + continue + return True + + def compareAgainstRef(self, hashdump, compare_path, hashdict=None): + refDict = self.loadFromFile(hashdump, compare_path) + retval = True # We want the full diagnostics of the compare, so we need a return value instead of raising errors + if not hashdict: + if hashdump == "hashdump_python.txt": + hashdict = self.hashes_dict_python + elif hashdump == "hashdump_rust.txt": + hashdict = self.hashes_dict_rust + else: + raise IOError(f"::error:: ciTester does not have hash dictionary to compare against {hashdump}.") + + if not refDict: + raise IOError(f"::error::Loading dict from file {hashdump} failed") + + if ( refDict != hashdict ): # ordering should not matter for comparison of dictionaries + unique_hash = set(hashdict.keys()) - set(refDict.keys()) + if unique_hash: + print(f"::warning::Generated hashset has file entries {unique_hash} which are not found in the reference set.") + + for file in refDict.keys(): + if file not in hashdict: + print(f"::error::Reference has entry for {file} but could not find entry for this file in generated hashset.") + retval = False + + uniq_func_gen = set(hashdict[file].keys()) - set(refDict[file].keys()) + + if uniq_func_gen: + print(f"::warning::Generated hashset has function entries {uniq_func_gen} which are not found in reference set.") + + for func, argdict in refDict[file].items(): + if func not in hashdict[file]: + retval = False + print(f"::error::Call to func {func} missing from generated hashes.") + + diff_ref = set([(hashi[0], hashi[1][0]) for hashi in argdict.items()]) - set([(hashi[0], hashi[1][0]) for hashi in hashdict[file][func].items()]) + diff_gen = set([(hashi[0], hashi[1][0]) for hashi in hashdict[file][func].items()]) - set([(hashi[0], hashi[1][0]) for hashi in argdict.items()]) + + if diff_gen and not diff_ref: + print(f"::warning:: Generated hashset contains unique entries {diff_gen}!") + + if diff_ref: + retval = False + print(f"::error:: Difference in the hashes or args!\n reference: {diff_ref} \n generated: {diff_gen}!") + + elif refDict == hashdict: + print("Hash dictionaries match") + + return retval + + +if __name__ == "__main__": + ciTester = Tester() + nonraw_to_raw_map=None + for file in files: + # Load data + filename = os.path.join(datalocation, file) + + print(filename) + ciTester.changeFile(filename) + ciTester.loadobj() + + # Test compare + cid = 1 + ciTester.compare( + "read_velocity_cells", + {"cellid": cid, "pop": "proton"}, + "read_vdf_sparse", + {"cid": cid, "pop": "proton"}, + ) + + # Handle input variables and filter them for the calls used later + variables_to_test = [ + "CellID", + "vg_rhom", + "vg_v", + "vg_rhoq", + "proton/vg_rho", + "proton/vg_v", + ] # fg_variable read issue with read_variable + variables_to_test_nonraw = ["fg_b", "fg_v"] + + pylist = ciTester.vlsvobj_python.get_variables() + rustlist = ciTester.vlsvobj_rust.list_variables() + + variables = [ + [var] for var in variables_to_test if (var in pylist and var in rustlist) + ] + nonraw_vars = [ + [var, 0] + for var in variables_to_test_nonraw + if (var in pylist and var in rustlist) + ] + + ############ Make hash rust########### + ciTester.setHashTarget("rust") + nonraw_to_raw_map = {str(var): str([var[0]]) for var in nonraw_vars} + if "3D" in filename: + ciTester.hash("read_variable_raw", variables, loop=True, flatten=True) + ciTester.hash("read_variable", nonraw_vars, loop=True, flatten=True) + + ############ Make hash python########### + ciTester.setHashTarget("python") + # read_interpolated_variable + if "proton/vg_v" in pylist: + ciTester.interpolationtest2d("proton/vg_v") + ciTester.interpolationtest3d() + + ########### Cut3d########### + if "3D" in ciTester.filename: + RE = 6371.0e3 + box = [-45, +20, -30, +30, -1, +1] + xmin, xmax, ymin, ymax, zmin, zmax = np.array(box) * RE + ciTester.hash( + pt.calculations.cut3d, + { + "vlsvReader": ciTester.vlsvobj, + "xmin": xmin, + "xmax": xmax, + "ymin": ymin, + "ymax": ymax, + "zmin": zmin, + "zmax": zmax, + "variable": "vg_connection", + }, + novlsv=True, + argkey_name="vg_connection", + flatten=False, + ) + + ############ read_variable########### + variables.extend([[var[0]] for var in nonraw_vars]) # prob some prettier way than looping through it all but it's not a big list + ciTester.hash("read_variable", variables, loop=True, flatten=True) + + ##############GENERATION OF REFERENCE DATA############### + if generate_path: + if ciTester.hashes_dict_python: + ciTester.dumpIntoFile( + ciTester.hashes_dict_python, "hashdump_python.txt", generate_path + ) + os.system(f"cat {os.path.join(generate_path, 'hashdump_python.txt')}") + else: + print("::warning::No python hashes to dump into file!") + if ciTester.hashes_dict_rust: + ciTester.dumpIntoFile( + ciTester.hashes_dict_rust, "hashdump_rust.txt", generate_path + ) + os.system(f"cat {os.path.join(generate_path, 'hashdump_rust.txt')}") + else: + print("::warning::No rust hashes to dump into file!") + + ##############COMPARISON AGAINST REFERENCE ############### + if compare_path: + dumps = ["hashdump_python.txt", "hashdump_rust.txt"] + for hashdump in dumps: + if not ciTester.compareAgainstRef(hashdump, compare_path): + raise SystemError("Compare failed, see the logs") + + ##############Compare vlsvreader and vlsvrs hashes############### + ciTester.compareReaders(nonraw_to_raw_map)