diff --git a/analysator/pyVlsv/vlsvreader.py b/analysator/pyVlsv/vlsvreader.py index ac0caf88b..5c2b93853 100644 --- a/analysator/pyVlsv/vlsvreader.py +++ b/analysator/pyVlsv/vlsvreader.py @@ -1,4 +1,3 @@ -#s # This file is part of Analysator. # Copyright 2013-2016 Finnish Meteorological Institute # Copyright 2017-2024 University of Helsinki @@ -852,18 +851,10 @@ def check_population( self, popname ): blockidsexist = False foundpop = False for child in self.__xml_root: - if child.tag == "BLOCKIDS": + if child.tag == "BLOCKVARIABLE": if "name" in child.attrib: - if popname.lower() == child.attrib["name"].lower(): + if popname.lower() == child.attrib["name"].lower(): # avgs foundpop = True - else: - blockidsexist = True - if blockidsexist: - for child in self.__xml_root: - if child.tag == "BLOCKVARIABLE": - if "name" in child.attrib: - if popname.lower() == child.attrib["name"].lower(): # avgs - foundpop = True return foundpop def get_all_variables( self ): @@ -3365,19 +3356,7 @@ def read_velocity_distribution_dense(self, cellid, pop="proton", regularize=True if setThreshold is None: # Drop all velocity cells which are below the sparsity threshold. Otherwise the plot will show buffer # cells as well. - if self.check_variable('MinValue') == True: # Sparsity threshold used to be saved as MinValue - setThreshold = self.read_variable('MinValue',cellid) - logging.info("Found a vlsv file MinValue of "+str(setThreshold)) - elif self.check_variable(pop+"/EffectiveSparsityThreshold") == True: - setThreshold = self.read_variable(pop+"/EffectiveSparsityThreshold",cellid) - logging.info("Found a vlsv file value "+pop+"/EffectiveSparsityThreshold"+" of "+str(setThreshold)) - elif self.check_variable(pop+"/vg_effectivesparsitythreshold") == True: - setThreshold = self.read_variable(pop+"/vg_effectivesparsitythreshold",cellid) - logging.info("Found a vlsv file value "+pop+"/vg_effectivesparsitythreshold"+" of "+str(setThreshold)) - else: - logging.warning("Unable to find a MinValue or EffectiveSparsityThreshold value from the .vlsv file.") - logging.info("Using a default value of 1.e-16. Override with setThreshold=value.") - setThreshold = 1.e-16 + setThreshold = self.get_sparsity_for_cid(cellid,pop) ii_f = np.where(velocity_cell_values >= setThreshold) logging.info("Dropping velocity cells under setThreshold value "+str(setThreshold)) @@ -3409,6 +3388,64 @@ def read_velocity_distribution_dense(self, cellid, pop="proton", regularize=True return da, edges + def read_compression(self): + '''' + 0 => "MLP", + 1 => "MLPMULTI", + 2 => "ZFP", + 3 => "OCTREE", + _ => "None", + ''' + try: + compression=self.read_parameter("COMPRESSION") + except: + compression = 4 + return compression + + def get_sparsity_for_cid(self,cellid,pop): + if self.check_variable('MinValue') == True: + val = self.read_variable('MinValue',cellid) + logging.info("Found a vlsv file MinValue of "+str(val)) + elif self.check_variable(pop+"/EffectiveSparsityThreshold") == True: + val = self.read_variable(pop+"/EffectiveSparsityThreshold",cellid) + logging.info("Found a vlsv file value "+pop+"/EffectiveSparsityThreshold"+" of "+str(val)) + elif self.check_variable(pop+"/vg_effectivesparsitythreshold") == True: + val = self.read_variable(pop+"/vg_effectivesparsitythreshold",cellid) + logging.info("Found a vlsv file value "+pop+"/vg_effectivesparsitythreshold"+" of "+str(val)) + else: + logging.warning("Unable to find a MinValue or EffectiveSparsityThreshold value from the .vlsv file.") + logging.info("Using a default value of 1.e-16. Override with val=value.") + val = 1.e-16 + return val + + + # Taken from https://github.com/kstppd/vlsvrs/blob/e07a7039d49e824e4644f410038bddec6463e579/src/vlsv_reader.rs#L1350 + def parse_octree_state(self, octree_bytes): + import struct + offset = 0 + buffer_len = len(octree_bytes) + def read_struct(fmt, size): + nonlocal offset + if offset + size > buffer_len: + raise ValueError("Overflow") + val = struct.unpack_from(f"<{fmt}", octree_bytes, offset) + offset += size + return val + (n_ignored_blocks,) = read_struct("Q", 8) + blocks_to_ignore = [] + if n_ignored_blocks > 0: + fmt_str = f"{n_ignored_blocks}I" + size_needed = n_ignored_blocks * 4 + blocks_to_ignore = list(read_struct(fmt_str, size_needed)) + bbox_shape = list(read_struct("3Q", 24)) + bbox_lims = list(read_struct("6d", 48)) + return { + "blocks_to_ignore": blocks_to_ignore, + "bbox_shape": bbox_shape, + "bbox_lims": bbox_lims, + "read_index": offset, + } + def read_velocity_cells(self, cellid, pop="proton"): ''' Read velocity cells from a spatial cell @@ -3472,20 +3509,142 @@ def read_velocity_cells(self, cellid, pop="proton"): for child in self.__xml_root: # Read in avgs if "name" in child.attrib and (child.attrib["name"] == pop) and (child.tag == "BLOCKVARIABLE"): - vector_size = ast.literal_eval(child.attrib["vectorsize"]) - #array_size = ast.literal_eval(child.attrib["arraysize"]) - element_size = ast.literal_eval(child.attrib["datasize"]) - datatype = child.attrib["datatype"] - - # Navigate to the correct position - offset_avgs = int(offset * vector_size * element_size + ast.literal_eval(child.text)) - fptr.seek(offset_avgs) - - if datatype == "float" and element_size == 4: - data_avgs = np.fromfile(fptr, dtype = np.float32, count = vector_size*num_of_blocks) - if datatype == "float" and element_size == 8: - data_avgs = np.fromfile(fptr, dtype = np.float64, count = vector_size*num_of_blocks) - data_avgs = data_avgs.reshape(num_of_blocks, vector_size) + compression_type = self.read_compression() + if compression_type == 0: # MLP + assert False, "Compression method MLP not yet implemented!" + + elif compression_type == 1: # MLPMULTI + assert False, "Compression method MLPMULTI not yet implemented!" + + elif compression_type == 2: # ZFP + import zfpy + + vdf_byte_size = int(self.read_parameter("VDF_BYTE_SIZE")) + bpc = self.read(mesh="SpatialGrid", tag="BYTESPERCELL", name=pop) + amount = bpc[cells_with_blocks_index] + loc = ast.literal_eval(child.text) + np.sum(bpc[0:cells_with_blocks_index]) + fptr.seek(loc) + compressed_data = np.fromfile(fptr, dtype=np.ubyte, count=amount) + vector_size = np.pow(self.get_WID(), 3) + + if vdf_byte_size == 4: + data_avgs = zfpy._decompress( + compressed_data, + zfpy.type_float, + [ + num_of_blocks * vector_size, + ], + out=None, + tolerance=self.get_sparsity_for_cid(cellid, pop), + rate=-1, + precision=-1, + ) + else: + data_avgs = zfpy._decompress( + compressed_data, + zfpy.type_double, + [ + num_of_blocks * vector_size, + ], + out=None, + tolerance=self.get_sparsity_for_cid(cellid, pop), + rate=-1, + precision=-1, + ) + + elif compression_type == 3: # OCTREE + import ctypes + from ctypes import POINTER, c_float, c_ubyte, c_size_t, c_uint64 + + lib = ctypes.CDLL("libtoctree_compressor.so") + lib.uncompress_with_toctree_method.argtypes = [ + POINTER(c_float), + c_size_t, + c_size_t, + c_size_t, + POINTER(c_ubyte), + c_uint64, + ] + vdf_byte_size = int(self.read_parameter("VDF_BYTE_SIZE")) + bpc = self.read(mesh="SpatialGrid", tag="BYTESPERCELL", name=pop) + amount = bpc[cells_with_blocks_index] + loc = ast.literal_eval(child.text) + np.sum(bpc[0:cells_with_blocks_index]) + + fptr.seek(loc) + octree_bytes = np.fromfile(fptr, dtype=np.uint8, count=amount) + octree_state = self.parse_octree_state(octree_bytes) + read_index = octree_state["read_index"] + nx, ny, nz = octree_state["bbox_shape"] + + octree_core = octree_bytes[read_index:].copy() + core_ptr = octree_core.ctypes.data_as(POINTER(c_ubyte)) + core_len = octree_core.nbytes + + cropped_vdf = np.zeros((nx, ny, nz), dtype=np.float32) + output_ptr = cropped_vdf.ctypes.data_as(POINTER(c_float)) + + lib.uncompress_with_toctree_method(output_ptr, nx, ny, nz, core_ptr, core_len) + + nx, ny, nz = cropped_vdf.shape + dv = self.get_velocity_mesh_dv(pop) + WID = self.get_WID() + WID2 = WID * WID + WID3 = WID2 * WID + Nb = self.get_velocity_mesh_size(pop) + Nb_x, Nb_y, Nb_z = int(Nb[0]), int(Nb[1]), int(Nb[2]) + global_shape = self.get_velocity_mesh_size(pop) * self.get_WID() + g_Nx, g_Ny, g_Nz = int(global_shape[0]), int(global_shape[1]), int(global_shape[2]) + cropped_vx_min, cropped_vy_min, cropped_vz_min, _, _, _ = octree_state["bbox_lims"] + global_vx_min, global_vy_min, global_vz_min, _, _, _ = ( + self.get_velocity_mesh_extent(pop) + ) + offset_i = int(round((cropped_vx_min - global_vx_min) / dv[0])) + offset_j = int(round((cropped_vy_min - global_vy_min) / dv[1])) + offset_k = int(round((cropped_vz_min - global_vz_min) / dv[2])) + + velocity_cells = {} + for k in range(nz): + for j in range(ny): + for i in range(nx): + gi = i + offset_i + gj = j + offset_j + gk = k + offset_k + if not (0 <= gi < g_Nx and 0 <= gj < g_Ny and 0 <= gk < g_Nz): + continue + val = cropped_vdf[i, j, k] + if val < self.get_sparsity_for_cid(cellid, pop): + continue + bi = gi // WID + bj = gj // WID + bk = gk // WID + li = gi % WID + lj = gj % WID + lk = gk % WID + local_id = lk * WID2 + lj * WID + li + velocity_block_id = bk * (Nb_y * Nb_x) + bj * Nb_x + bi + global_id = local_id + WID3 * velocity_block_id + velocity_cells[int(global_id)] = float(val) + return velocity_cells # we rrturn early here since we have already constructed the velocity_cells thingy + + elif compression_type == 4: # No asterix compression used here + vector_size = ast.literal_eval(child.attrib["vectorsize"]) + element_size = ast.literal_eval(child.attrib["datasize"]) + datatype = child.attrib["datatype"] + offset_avgs = int( + offset * vector_size * element_size + ast.literal_eval(child.text) + ) + fptr.seek(offset_avgs) + + if datatype == "float" and element_size == 4: + data_avgs = np.fromfile( + fptr, dtype=np.float32, count=vector_size * num_of_blocks + ) + elif datatype == "float" and element_size == 8: + data_avgs = np.fromfile( + fptr, dtype=np.float64, count=vector_size * num_of_blocks + ) + + data_avgs = data_avgs.reshape(num_of_blocks, vector_size) # Read in block coordinates: if ("name" in child.attrib) and (child.attrib["name"] == pop) and (child.tag == "BLOCKIDS"): vector_size = ast.literal_eval(child.attrib["vectorsize"]) diff --git a/pyproject.toml b/pyproject.toml index 5da7dfc52..9fc739bd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "matplotlib", "packaging", "scikit-image", + "zfpy", ] [project.optional-dependencies] none = [