1212import numpy .random as nrandom
1313import pandas
1414from pandas .testing import assert_frame_equal
15- from pandas .io .json import json_normalize
15+ try :
16+ from pandas import json_normalize
17+ except ImportError :
18+ from pandas .io .json import json_normalize
1619from .dataframe_split import sklearn_train_test_split , sklearn_train_test_split_streaming
1720from .dataframe_io_helpers import enumerate_json_items , JsonIterator2Stream
1821
@@ -609,6 +612,22 @@ def reservoir_iterate(sdf, indices, chunksize):
609612 return StreamingDataFrame (
610613 lambda : reservoir_iterate (sdf = self , indices = indices , chunksize = 1000 ))
611614
615+ def drop (self , labels = None , * , axis = 0 , index = None , columns = None , level = None ,
616+ inplace = False , errors = 'raise' ) -> 'StreamingDataFrame' :
617+ """
618+ Applies :epkg:`pandas:DataFrame:drop`.
619+ This function returns a @see cl StreamingDataFrame.
620+ """
621+ if axis == 0 :
622+ raise NotImplementedError (f"drop is not implemented for axis={ axis } ." )
623+ if inplace :
624+ raise NotImplementedError (f"drop is not implemented for inplace={ inplace } ." )
625+ return StreamingDataFrame (
626+ lambda : map (lambda df : df .drop (
627+ labels , axis = axis , index = index , columns = columns ,
628+ level = level , inplace = False , errors = errors ), self ),
629+ ** self .get_kwargs ())
630+
612631 def apply (self , * args , ** kwargs ) -> 'StreamingDataFrame' :
613632 """
614633 Applies :epkg:`pandas:DataFrame:apply`.
@@ -1078,8 +1097,7 @@ def iterate_na(self, **kwargs):
10781097 return StreamingDataFrame (
10791098 lambda : iterate_na (self , ** kwargs ), ** self .get_kwargs ())
10801099
1081- def describe (self , percentiles = None , include = None , exclude = None ,
1082- datetime_is_numeric = False ):
1100+ def describe (self , percentiles = None , include = None , exclude = None ):
10831101 """
10841102 Calls :epkg:`pandas:DataFrame:describe` on every piece
10851103 of the datasets. *percentiles* are not really accurate
@@ -1088,16 +1106,19 @@ def describe(self, percentiles=None, include=None, exclude=None,
10881106 :param percentiles: see :epkg:`pandas:DataFrame:describe`
10891107 :param include: see :epkg:`pandas:DataFrame:describe`
10901108 :param exclude: see :epkg:`pandas:DataFrame:describe`
1091- :param datetime_is_numeric: see :epkg:`pandas:DataFrame:describe`
10921109 :return: :epkg:`pandas:DataFrame:describe`
1110+
1111+ .. versionchanged:: 0.3.219
1112+
1113+ Parameter *datetime_is_numeric* was removed
1114+ (see :epkg:`pandas:DataFrame:describe`).
10931115 """
10941116 merged = None
10951117 stack = []
10961118 notper = ['count' , 'mean' , 'std' ]
10971119 for df in self :
10981120 desc = df .describe (
1099- percentiles = percentiles , include = include , exclude = exclude ,
1100- datetime_is_numeric = datetime_is_numeric )
1121+ percentiles = percentiles , include = include , exclude = exclude )
11011122 count = desc .loc ['count' , :]
11021123 rows = [name for name in desc .index if name not in notper ]
11031124 stack .append (desc .loc [rows , :])
@@ -1120,8 +1141,7 @@ def describe(self, percentiles=None, include=None, exclude=None,
11201141 merged .loc ['std' , :] / merged .loc ['count' , :] -
11211142 merged .loc ['mean' , :] ** 2 ) ** 0.5
11221143 values = pandas .concat (stack )
1123- summary = values .describe (percentiles = percentiles ,
1124- datetime_is_numeric = datetime_is_numeric )
1144+ summary = values .describe (percentiles = percentiles )
11251145 merged = merged .loc [notper , :]
11261146 rows = [name for name in summary .index if name not in notper ]
11271147 summary = summary .loc [rows , :]
0 commit comments