@@ -5879,6 +5879,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
58795879 to_datetime : Convert argument to datetime.
58805880 to_timedelta : Convert argument to timedelta.
58815881 to_numeric : Convert argument to numeric type.
5882+ convert_dtypes : Convert argument to best possible dtype.
58825883
58835884 Examples
58845885 --------
@@ -5907,6 +5908,142 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
59075908 )
59085909 ).__finalize__ (self )
59095910
5911+ def convert_dtypes (
5912+ self : FrameOrSeries ,
5913+ infer_objects : bool_t = True ,
5914+ convert_string : bool_t = True ,
5915+ convert_integer : bool_t = True ,
5916+ convert_boolean : bool_t = True ,
5917+ ) -> FrameOrSeries :
5918+ """
5919+ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
5920+
5921+ .. versionadded:: 1.0.0
5922+
5923+ Parameters
5924+ ----------
5925+ infer_objects : bool, default True
5926+ Whether object dtypes should be converted to the best possible types.
5927+ convert_string : bool, default True
5928+ Whether object dtypes should be converted to ``StringDtype()``.
5929+ convert_integer : bool, default True
5930+ Whether, if possible, conversion can be done to integer extension types.
5931+ convert_boolean : bool, defaults True
5932+ Whether object dtypes should be converted to ``BooleanDtypes()``.
5933+
5934+ Returns
5935+ -------
5936+ Series or DataFrame
5937+ Copy of input object with new dtype.
5938+
5939+ See Also
5940+ --------
5941+ infer_objects : Infer dtypes of objects.
5942+ to_datetime : Convert argument to datetime.
5943+ to_timedelta : Convert argument to timedelta.
5944+ to_numeric : Convert argument to a numeric type.
5945+
5946+ Notes
5947+ -----
5948+
5949+ By default, ``convert_dtypes`` will attempt to convert a Series (or each
5950+ Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
5951+ ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is
5952+ possible to turn off individual conversions to ``StringDtype``, the integer
5953+ extension types or ``BooleanDtype``, respectively.
5954+
5955+ For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
5956+ rules as during normal Series/DataFrame construction. Then, if possible,
5957+ convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension
5958+ type, otherwise leave as ``object``.
5959+
5960+ If the dtype is integer, convert to an appropriate integer extension type.
5961+
5962+ If the dtype is numeric, and consists of all integers, convert to an
5963+ appropriate integer extension type.
5964+
5965+ In the future, as new dtypes are added that support ``pd.NA``, the results
5966+ of this method will change to support those new dtypes.
5967+
5968+ Examples
5969+ --------
5970+ >>> df = pd.DataFrame(
5971+ ... {
5972+ ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
5973+ ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
5974+ ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
5975+ ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
5976+ ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
5977+ ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
5978+ ... }
5979+ ... )
5980+
5981+ Start with a DataFrame with default dtypes.
5982+
5983+ >>> df
5984+ a b c d e f
5985+ 0 1 x True h 10.0 NaN
5986+ 1 2 y False i NaN 100.5
5987+ 2 3 z NaN NaN 20.0 200.0
5988+
5989+ >>> df.dtypes
5990+ a int32
5991+ b object
5992+ c object
5993+ d object
5994+ e float64
5995+ f float64
5996+ dtype: object
5997+
5998+ Convert the DataFrame to use best possible dtypes.
5999+
6000+ >>> dfn = df.convert_dtypes()
6001+ >>> dfn
6002+ a b c d e f
6003+ 0 1 x True h 10 NaN
6004+ 1 2 y False i <NA> 100.5
6005+ 2 3 z <NA> <NA> 20 200.0
6006+
6007+ >>> dfn.dtypes
6008+ a Int32
6009+ b string
6010+ c boolean
6011+ d string
6012+ e Int64
6013+ f float64
6014+ dtype: object
6015+
6016+ Start with a Series of strings and missing data represented by ``np.nan``.
6017+
6018+ >>> s = pd.Series(["a", "b", np.nan])
6019+ >>> s
6020+ 0 a
6021+ 1 b
6022+ 2 NaN
6023+ dtype: object
6024+
6025+ Obtain a Series with dtype ``StringDtype``.
6026+
6027+ >>> s.convert_dtypes()
6028+ 0 a
6029+ 1 b
6030+ 2 <NA>
6031+ dtype: string
6032+ """
6033+ if self .ndim == 1 :
6034+ return self ._convert_dtypes (
6035+ infer_objects , convert_string , convert_integer , convert_boolean
6036+ )
6037+ else :
6038+ results = [
6039+ col ._convert_dtypes (
6040+ infer_objects , convert_string , convert_integer , convert_boolean
6041+ )
6042+ for col_name , col in self .items ()
6043+ ]
6044+ result = pd .concat (results , axis = 1 , copy = False )
6045+ return result
6046+
59106047 # ----------------------------------------------------------------------
59116048 # Filling NA's
59126049
0 commit comments