Hi,
in all the example notebooks the cell containing the code "out = unique_smiles(matches)" is not working, so it's impossible to produce the output.
The error is the following:
TypeError Traceback (most recent call last)
Cell In[8], line 1
----> 1 out = unique_smiles(matches)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pyMolNetEnhancer/molnetenhancer.py:157, in unique_smiles(matches)
155 matches[index] = matches[index].rename(columns = {'Scan':'cluster.index'})
156 if '#Scan#' in matches[index].columns:
--> 157 matches[index] = matches[index].groupby('#Scan#', as_index=False).agg(lambda x: ','.join(set(x.dropna())))
158 matches[index] = matches[index].rename(columns = {'#Scan#':'cluster.index'})
160 comb = reduce(lambda left,right: pd.merge(left,right,on='cluster.index', how = "outer"), matches)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:1495, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
1493 gba = GroupByApply(self, [func], args=(), kwargs={})
1494 try:
-> 1495 result = gba.agg()
1497 except ValueError as err:
1498 if "No objects to concatenate" not in str(err):
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:178, in Apply.agg(self)
175 return self.agg_dict_like()
176 elif is_list_like(func):
177 # we require a list, but not a 'str'
--> 178 return self.agg_list_like()
180 if callable(func):
181 f = com.get_cython_func(func)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:311, in Apply.agg_list_like(self)
303 def agg_list_like(self) -> DataFrame | Series:
304 """
305 Compute aggregation in the case of a list-like argument.
306
(...)
309 Result of aggregation.
310 """
--> 311 return self.agg_or_apply_list_like(op_name="agg")
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:1353, in GroupByApply.agg_or_apply_list_like(self, op_name)
1348 # Only set as_index=True on groupby objects, not Window or Resample
1349 # that inherit from this class.
1350 with com.temp_setattr(
1351 obj, "as_index", True, condition=hasattr(obj, "as_index")
1352 ):
-> 1353 keys, results = self.compute_list_like(op_name, selected_obj, kwargs)
1354 result = self.wrap_results_list_like(keys, results)
1355 return result
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:370, in Apply.compute_list_like(self, op_name, selected_obj, kwargs)
364 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
365 args = (
366 [self.axis, *self.args]
367 if include_axis(op_name, colg)
368 else self.args
369 )
--> 370 new_res = getattr(colg, op_name)(func, *args, **kwargs)
371 results.append(new_res)
372 indices.append(index)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:255, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
253 kwargs["engine"] = engine
254 kwargs["engine_kwargs"] = engine_kwargs
--> 255 ret = self._aggregate_multiple_funcs(func, *args, **kwargs)
256 if relabeling:
257 # columns is not narrowed by mypy from relabeling flag
258 assert columns is not None # for mypy
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:360, in SeriesGroupBy._aggregate_multiple_funcs(self, arg, *args, **kwargs)
358 for idx, (name, func) in enumerate(arg):
359 key = base.OutputKey(label=name, position=idx)
--> 360 results[key] = self.aggregate(func, *args, **kwargs)
362 if any(isinstance(x, DataFrame) for x in results.values()):
363 from pandas import concat
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:292, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
289 return self._python_agg_general(func, *args, **kwargs)
291 try:
--> 292 return self._python_agg_general(func, *args, **kwargs)
293 except KeyError:
294 # KeyError raised in test_groupby.test_basic is bc the func does
295 # a dictionary lookup on group.name, but group name is not
296 # pinned in _python_agg_general, only in _aggregate_named
297 result = self._aggregate_named(func, *args, **kwargs)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:325, in SeriesGroupBy._python_agg_general(self, func, *args, **kwargs)
322 f = lambda x: func(x, *args, **kwargs)
324 obj = self._obj_with_exclusions
--> 325 result = self.grouper.agg_series(obj, f)
326 res = obj._constructor(result, name=obj.name)
327 return self._wrap_aggregated_output(res)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/ops.py:849, in BaseGrouper.agg_series(self, obj, func, preserve_dtype)
842 if not isinstance(obj._values, np.ndarray):
843 # we can preserve a little bit more aggressively with EA dtype
844 # because maybe_cast_pointwise_result will do a try/except
845 # with _from_sequence. NB we are assuming here that _from_sequence
846 # is sufficiently strict that it casts appropriately.
847 preserve_dtype = True
--> 849 result = self._aggregate_series_pure_python(obj, func)
851 if len(obj) == 0 and len(result) == 0 and isinstance(obj.dtype, ExtensionDtype):
852 cls = obj.dtype.construct_array_type()
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/ops.py:877, in BaseGrouper._aggregate_series_pure_python(self, obj, func)
874 splitter = self._get_splitter(obj, axis=0)
876 for i, group in enumerate(splitter):
--> 877 res = func(group)
878 res = extract_result(res)
880 if not initialized:
881 # We only do this validation on the first iteration
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:322, in SeriesGroupBy._python_agg_general..(x)
320 alias = com._builtin_table_alias[func]
321 warn_alias_replacement(self, orig_func, alias)
--> 322 f = lambda x: func(x, *args, **kwargs)
324 obj = self._obj_with_exclusions
325 result = self.grouper.agg_series(obj, f)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pyMolNetEnhancer/molnetenhancer.py:157, in unique_smiles..(x)
155 matches[index] = matches[index].rename(columns = {'Scan':'cluster.index'})
156 if '#Scan#' in matches[index].columns:
--> 157 matches[index] = matches[index].groupby('#Scan#', as_index=False).agg(lambda x: ','.join(set(x.dropna())))
158 matches[index] = matches[index].rename(columns = {'#Scan#':'cluster.index'})
160 comb = reduce(lambda left,right: pd.merge(left,right,on='cluster.index', how = "outer"), matches)
TypeError: sequence item 0: expected str instance, int found
@madeleineernst can you solve this? Thanks
Hi,
in all the example notebooks the cell containing the code "out = unique_smiles(matches)" is not working, so it's impossible to produce the output.
The error is the following:
TypeError Traceback (most recent call last)
Cell In[8], line 1
----> 1 out = unique_smiles(matches)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pyMolNetEnhancer/molnetenhancer.py:157, in unique_smiles(matches)
155 matches[index] = matches[index].rename(columns = {'Scan':'cluster.index'})
156 if '#Scan#' in matches[index].columns:
--> 157 matches[index] = matches[index].groupby('#Scan#', as_index=False).agg(lambda x: ','.join(set(x.dropna())))
158 matches[index] = matches[index].rename(columns = {'#Scan#':'cluster.index'})
160 comb = reduce(lambda left,right: pd.merge(left,right,on='cluster.index', how = "outer"), matches)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:1495, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
1493 gba = GroupByApply(self, [func], args=(), kwargs={})
1494 try:
-> 1495 result = gba.agg()
1497 except ValueError as err:
1498 if "No objects to concatenate" not in str(err):
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:178, in Apply.agg(self)
175 return self.agg_dict_like()
176 elif is_list_like(func):
177 # we require a list, but not a 'str'
--> 178 return self.agg_list_like()
180 if callable(func):
181 f = com.get_cython_func(func)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:311, in Apply.agg_list_like(self)
303 def agg_list_like(self) -> DataFrame | Series:
304 """
305 Compute aggregation in the case of a list-like argument.
306
(...)
309 Result of aggregation.
310 """
--> 311 return self.agg_or_apply_list_like(op_name="agg")
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:1353, in GroupByApply.agg_or_apply_list_like(self, op_name)
1348 # Only set as_index=True on groupby objects, not Window or Resample
1349 # that inherit from this class.
1350 with com.temp_setattr(
1351 obj, "as_index", True, condition=hasattr(obj, "as_index")
1352 ):
-> 1353 keys, results = self.compute_list_like(op_name, selected_obj, kwargs)
1354 result = self.wrap_results_list_like(keys, results)
1355 return result
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/apply.py:370, in Apply.compute_list_like(self, op_name, selected_obj, kwargs)
364 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
365 args = (
366 [self.axis, *self.args]
367 if include_axis(op_name, colg)
368 else self.args
369 )
--> 370 new_res = getattr(colg, op_name)(func, *args, **kwargs)
371 results.append(new_res)
372 indices.append(index)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:255, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
253 kwargs["engine"] = engine
254 kwargs["engine_kwargs"] = engine_kwargs
--> 255 ret = self._aggregate_multiple_funcs(func, *args, **kwargs)
256 if relabeling:
257 # columns is not narrowed by mypy from relabeling flag
258 assert columns is not None # for mypy
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:360, in SeriesGroupBy._aggregate_multiple_funcs(self, arg, *args, **kwargs)
358 for idx, (name, func) in enumerate(arg):
359 key = base.OutputKey(label=name, position=idx)
--> 360 results[key] = self.aggregate(func, *args, **kwargs)
362 if any(isinstance(x, DataFrame) for x in results.values()):
363 from pandas import concat
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:292, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
289 return self._python_agg_general(func, *args, **kwargs)
291 try:
--> 292 return self._python_agg_general(func, *args, **kwargs)
293 except KeyError:
294 # KeyError raised in test_groupby.test_basic is bc the func does
295 # a dictionary lookup on group.name, but group name is not
296 # pinned in _python_agg_general, only in _aggregate_named
297 result = self._aggregate_named(func, *args, **kwargs)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:325, in SeriesGroupBy._python_agg_general(self, func, *args, **kwargs)
322 f = lambda x: func(x, *args, **kwargs)
324 obj = self._obj_with_exclusions
--> 325 result = self.grouper.agg_series(obj, f)
326 res = obj._constructor(result, name=obj.name)
327 return self._wrap_aggregated_output(res)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/ops.py:849, in BaseGrouper.agg_series(self, obj, func, preserve_dtype)
842 if not isinstance(obj._values, np.ndarray):
843 # we can preserve a little bit more aggressively with EA dtype
844 # because maybe_cast_pointwise_result will do a try/except
845 # with _from_sequence. NB we are assuming here that _from_sequence
846 # is sufficiently strict that it casts appropriately.
847 preserve_dtype = True
--> 849 result = self._aggregate_series_pure_python(obj, func)
851 if len(obj) == 0 and len(result) == 0 and isinstance(obj.dtype, ExtensionDtype):
852 cls = obj.dtype.construct_array_type()
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/ops.py:877, in BaseGrouper._aggregate_series_pure_python(self, obj, func)
874 splitter = self._get_splitter(obj, axis=0)
876 for i, group in enumerate(splitter):
--> 877 res = func(group)
878 res = extract_result(res)
880 if not initialized:
881 # We only do this validation on the first iteration
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pandas/core/groupby/generic.py:322, in SeriesGroupBy._python_agg_general..(x)
320 alias = com._builtin_table_alias[func]
321 warn_alias_replacement(self, orig_func, alias)
--> 322 f = lambda x: func(x, *args, **kwargs)
324 obj = self._obj_with_exclusions
325 result = self.grouper.agg_series(obj, f)
File ~/mambaforge/envs/molnetenhancer/lib/python3.12/site-packages/pyMolNetEnhancer/molnetenhancer.py:157, in unique_smiles..(x)
155 matches[index] = matches[index].rename(columns = {'Scan':'cluster.index'})
156 if '#Scan#' in matches[index].columns:
--> 157 matches[index] = matches[index].groupby('#Scan#', as_index=False).agg(lambda x: ','.join(set(x.dropna())))
158 matches[index] = matches[index].rename(columns = {'#Scan#':'cluster.index'})
160 comb = reduce(lambda left,right: pd.merge(left,right,on='cluster.index', how = "outer"), matches)
TypeError: sequence item 0: expected str instance, int found
@madeleineernst can you solve this? Thanks