From 73eaf7d29c42a27027281ab2c299068e088badd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Phan=20T=E1=BA=A5t=20B=C3=A1ch?= Date: Tue, 24 Sep 2024 12:15:27 +0200 Subject: [PATCH 1/2] Update mxcalc.py Adding _ to np.bool to make sure it works with numpy 1.26.4 on Google Colab --- nephosem/specutils/mxcalc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nephosem/specutils/mxcalc.py b/nephosem/specutils/mxcalc.py index 5f52e36..0c41b42 100644 --- a/nephosem/specutils/mxcalc.py +++ b/nephosem/specutils/mxcalc.py @@ -506,7 +506,9 @@ def compute_token_weights(tcPositionMTX, twMTX, booleanize = True, tokenFormat=' missing_types = [] tokens = tcPositionMTX.row_items types = set(twMTX.row_items) # set of target types - tcmx_type = np.bool if booleanize else tcPositionMTX.matrix.dtype + # tcmx_type = np.bool if booleanize else tcPositionMTX.matrix.dtype + # Adding _ to make sure it works with numpy 1.26.4 in Google Colab + tcmx_type = np.bool_ if booleanize else tcPositionMTX.matrix.dtype bool_tcmx = tcPositionMTX.matrix.astype(tcmx_type, copy=True).toarray() twmx = twMTX.matrix.toarray() # transform to dense matrix (numpy.ndarray) resmx = np.zeros(bool_tcmx.shape) From ad4c2fe47772355f0e71f26bb4730c699b2eb770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Phan=20T=E1=BA=A5t=20B=C3=A1ch?= Date: Wed, 25 Sep 2024 17:44:12 +0200 Subject: [PATCH 2/2] Pandas sparse matrix Update the code to make it work with recent pandas. --- nephosem/core/matrix.py | 5 +- .../data/StanfDepSents.templates.graphml.xml | 342 ++++++++++++++++++ 2 files changed, 346 insertions(+), 1 deletion(-) create mode 100644 nephosem/tests/data/StanfDepSents.templates.graphml.xml diff --git a/nephosem/core/matrix.py b/nephosem/core/matrix.py index 9ee1bf9..72ae70b 100644 --- a/nephosem/core/matrix.py +++ b/nephosem/core/matrix.py @@ -152,7 +152,10 @@ def __init__(self, matrix): @property def dataframe(self): - return pd.SparseDataFrame(self.matrix).to_dense() + # return pd.SparseDataFrame(self.matrix).to_dense() + # The code is outdated. Use the following instead. + return pd.DataFrame.sparse.from_spmatrix(self.matrix).sparse.to_dense() + def __getitem__(self, arg): return self._get_value(arg) diff --git a/nephosem/tests/data/StanfDepSents.templates.graphml.xml b/nephosem/tests/data/StanfDepSents.templates.graphml.xml new file mode 100644 index 0000000..57508c3 --- /dev/null +++ b/nephosem/tests/data/StanfDepSents.templates.graphml.xml @@ -0,0 +1,342 @@ + + + + (\w+) + + + \w+ + + + + + (V)\w* + + + (N)\w* + + + (nsubj)$ + + + + + \w+ + (V)\w* + + + (N)\w* + + + (JJ) + + + (nsubj)$ + + + (acomp) + + + + + (V)\w* + + + (N)\w* + + + (JJ) + + + (nsubj)$ + + + (acomp) + + + + + (V)\w* + + + (N)\w* + + + (IN) + + + (N)\w* + + + (nsubj) + + + (prep) + + + (pobj) + + + + + (V)\w* + + + (N)\w* + + + (IN) + + + (nsubj) + + + (prep) + + + + + (V)\w* + + + (IN) + + + (N)\w* + + + (prep) + + + (pobj) + + + + + (V)\w* + + + (N)\w* + + + (N)\w* + + + (nsubj)$ + + + (dobj) + + + + + (V)\w* + + + (N)\w* + + + (dobj) + + + + + (\w+) + (V)\w* + + + + (\w+) + (N)\w+ + + + (\w+) + (N)\w+ + + + (nsubj)$ + + + (dobj) + + + + + (N)\w* + + + (JJ) + + + (amod) + + + + + (JJ) + + + (RB)\w* + + + (advmod) + + + + + (V)\w* + + + (N)\w* + + + (nsubjpass) + + + + + (V)\w* + + + by + IN + + + (N)\w* + + + (N)\w* + + + (agent) + + + pobj + + + (nsubjpass) + + + + + (V)\w* + + + by + IN + + + (N)\w* + + + (agent) + + + pobj + + + + + (N)\w* + + + (N)\w* + + + (appos) + + + + + (V)\w* + + + \w+ + (N)\w* + + + (N)\w* + + + (nsubj) + + + (conj) + + + + + \w+ + (V)\w* + + + (N)\w* + + + (V)\w* + + + (nsubj) + + + (conj) + + + + + (V)\w* + + + \w+ + \w* + + + \w+ + \w* + + + (dobj) + + + (dative) + + + + + (V)\w* + + + (N)\w* + + + \w+ + \w* + + + (dobj) + + + (dative) + + + + + (V)\w* + + + (N)\w* + + + \w+ + \w* + + + (dative) + + + (dobj) + + + \ No newline at end of file