diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 230ebc3c10fad..9a6b531c6da5e 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -290,7 +290,7 @@ def box_expected(expected, box_cls, transpose: bool = True): else: expected = pd.array(expected, copy=False) elif box_cls is Index: - expected = Index(expected) + expected = Index(expected, copy=False) elif box_cls is Series: expected = Series(expected) elif box_cls is DataFrame: diff --git a/pandas/conftest.py b/pandas/conftest.py index 74c79c7025ec6..1e1d84e08dcd9 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -769,7 +769,7 @@ def index_with_missing(request): vals = ind.values.copy() vals[0] = None vals[-1] = None - return type(ind)(vals) + return type(ind)(vals, copy=False) # ---------------------------------------------------------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 55a3022454e02..a6d14d6fbd152 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -926,7 +926,7 @@ def value_counts_internal( # Starting in 3.0, we no longer perform dtype inference on the # Index object we construct here, xref GH#56161 - idx = Index(keys, dtype=keys.dtype, name=index_name) + idx = Index(keys, dtype=keys.dtype, name=index_name, copy=False) if ( not sort diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f719fa4e4c839..3f218b3813149 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1972,7 +1972,7 @@ def relabel_result( fun = [ com.get_callable_name(f) if not isinstance(f, str) else f for f in fun ] - col_idx_order = Index(s.index).get_indexer(fun) + col_idx_order = Index(s.index, copy=False).get_indexer(fun) valid_idx = col_idx_order != -1 if valid_idx.any(): s = s.iloc[col_idx_order[valid_idx]] diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index ff01d4ac835ba..e4f4a6d242003 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -496,7 +496,7 @@ def value_counts(self, dropna: bool = True) -> Series: result = value_counts(values, sort=False, dropna=dropna) index_arr = self._from_backing_data(np.asarray(result.index._data)) - index = Index(index_arr, name=result.index.name) + index = Index(index_arr, name=result.index.name, copy=False) return Series(result._values, index=index, name=result.name, copy=False) def _quantile( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d2d67dd644303..0e69ba9a1b778 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1804,7 +1804,7 @@ def value_counts(self, dropna: bool = True) -> Series: counts = ArrowExtensionArray(counts) - index = Index(self._from_pyarrow_array(values)) + index = Index(self._from_pyarrow_array(values), copy=False) return Series(counts, index=index, name="count", copy=False) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3cea95c81b7f2..c24ec494a84ce 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -670,7 +670,7 @@ def _from_inferred_categories( to_timedelta, ) - cats = Index(inferred_categories) + cats = Index(inferred_categories, copy=False) known_categories = ( isinstance(dtype, CategoricalDtype) and dtype.categories is not None ) @@ -2397,7 +2397,7 @@ def _validate_listlike(self, value): from pandas import Index # tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914 - to_add = Index._with_infer(value, tupleize_cols=False).difference( + to_add = Index._with_infer(value, tupleize_cols=False, copy=False).difference( self.categories ) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 9a0b29316d192..390c4d2a8bcdc 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1422,7 +1422,8 @@ def value_counts(self, dropna: bool = True) -> Series: self.dtype.construct_array_type()( keys, # type: ignore[arg-type] mask_index, - ) + ), + copy=False, ) return Series(arr, index=index, name="count", copy=False) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 86140229b724e..4b1c05fe7be0b 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -961,7 +961,7 @@ def value_counts(self, dropna: bool = True) -> Series: counts = np.insert(counts, 0, fcounts) if not isinstance(keys, ABCIndex): - index = Index(keys) + index = Index(keys, copy=False) else: index = keys return Series(counts, index=index, copy=False) diff --git a/pandas/core/base.py b/pandas/core/base.py index 200b16b4b6b1a..8fb7b6590afb1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1312,11 +1312,11 @@ def factorize( from pandas import Index try: - uniques = Index(uniques, dtype=self.dtype) + uniques = Index(uniques, dtype=self.dtype, copy=False) except NotImplementedError: # not all dtypes are supported in Index that are allowed for Series # e.g. float16 or bytes - uniques = Index(uniques) + uniques = Index(uniques, copy=False) return codes, uniques _shared_docs["searchsorted"] = """ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b4bdaefbe34b9..93e9d16228eac 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1250,7 +1250,7 @@ def _set_result_index_ordered( return result # row order is scrambled => sort the rows by position in original index - original_positions = Index(self._grouper.result_ilocs) + original_positions = Index(self._grouper.result_ilocs, copy=False) result = result.set_axis(original_positions, axis=0) result = result.sort_index(axis=0) if self._grouper.has_dropped_na: @@ -1298,7 +1298,7 @@ def _insert_inaxis_grouper( if qs is None: result.insert(0, name, lev) else: - result.insert(0, name, Index(np.repeat(lev, len(qs)))) + result.insert(0, name, Index(np.repeat(lev, len(qs)), copy=False)) return result @@ -4392,7 +4392,7 @@ def _nth( # error: No overload variant of "where" matches argument types # "Any", "NAType", "Any" values = np.where(nulls, NA, grouper) # type: ignore[call-overload] - grouper = Index(values, dtype="Int64") + grouper = Index(values, dtype="Int64", copy=False) grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) return grb.nth(n) @@ -5806,7 +5806,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde MultiIndex """ nqs = len(qs) - lev_codes, lev = Index(qs).factorize() + lev_codes, lev = Index(qs, copy=False).factorize() lev_codes = coerce_indexer_dtype(lev_codes, lev) if idx._is_multi: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index eacde2e9661a8..1f8ee634e7332 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -515,7 +515,9 @@ def __init__( # error: Cannot determine type of "grouping_vector" [has-type] ng = newgrouper.groupings[0].grouping_vector # type: ignore[has-type] # use Index instead of ndarray so we can recover the name - grouping_vector = Index(ng, name=newgrouper.result_index.name) + grouping_vector = Index( + ng, name=newgrouper.result_index.name, copy=False + ) elif not isinstance( grouping_vector, (Series, Index, ExtensionArray, np.ndarray) @@ -684,7 +686,7 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]: @cache_readonly def groups(self) -> dict[Hashable, Index]: codes, uniques = self._codes_and_uniques - uniques = Index._with_infer(uniques, name=self.name) + uniques = Index._with_infer(uniques, name=self.name, copy=False) r, counts = libalgos.groupsort_indexer(ensure_platform_int(codes), len(uniques)) counts = ensure_int64(counts).cumsum() diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index d86264cb95dc5..636f8a7a7affc 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -730,7 +730,7 @@ def groups(self) -> dict[Hashable, Index]: @cache_readonly def is_monotonic(self) -> bool: # return if my group orderings are monotonic - return Index(self.ids).is_monotonic_increasing + return Index(self.ids, copy=False).is_monotonic_increasing @final @cache_readonly @@ -760,7 +760,9 @@ def ids(self) -> npt.NDArray[np.intp]: @cache_readonly def result_index_and_ids(self) -> tuple[Index, npt.NDArray[np.intp]]: - levels = [Index._with_infer(ping.uniques) for ping in self.groupings] + levels = [ + Index._with_infer(ping.uniques, copy=False) for ping in self.groupings + ] obs = [ ping._observed or not ping._passed_categorical for ping in self.groupings ] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7ea6fc253c60b..e9eaff0e08bd2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -300,6 +300,7 @@ def _new_Index(cls, d): """ # required for backward compat, because PI can't be instantiated with # ordinals through __new__ GH #13277 + d["copy"] = False if issubclass(cls, ABCPeriodIndex): from pandas.core.indexes.period import _new_PeriodIndex @@ -692,7 +693,7 @@ def _with_infer(cls, *args, **kwargs): # "ndarray[Any, Any]" values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type] if values.dtype.kind in "iufb": - return Index(values, name=result.name) + return Index(values, name=result.name, copy=False) return result @@ -2775,7 +2776,7 @@ def fillna(self, value): # no need to care metadata other than name # because it can't have freq if it has NaTs # _with_infer needed for test_fillna_categorical - return Index._with_infer(result, name=self.name) + return Index._with_infer(result, name=self.name, copy=False) return self._view() def dropna(self, how: AnyAll = "any") -> Self: @@ -3910,8 +3911,8 @@ def _get_fill_indexer( if not (self.is_monotonic_increasing or self.is_monotonic_decreasing): raise ValueError("index must be monotonic increasing or decreasing") encoded = self.append(target)._engine.values # type: ignore[union-attr] - self_encoded = Index(encoded[: len(self)]) - target_encoded = Index(encoded[len(self) :]) + self_encoded = Index(encoded[: len(self)], copy=False) + target_encoded = Index(encoded[len(self) :], copy=False) return self_encoded._get_fill_indexer( target_encoded, method, limit, tolerance ) @@ -4338,7 +4339,7 @@ def _reindex_non_unique( new_indexer[~check] = -1 if not isinstance(self, ABCMultiIndex): - new_index = Index(new_labels, name=self.name) + new_index = Index(new_labels, name=self.name, copy=False) else: new_index = type(self).from_tuples(new_labels, names=self.names) return new_index, indexer, new_indexer @@ -4487,7 +4488,7 @@ def join( and not self.categories.equals(other.categories) ): # dtypes are "equal" but categories are in different order - other = Index(other._values.reorder_categories(self.categories)) + other = Index(other._values.reorder_categories(self.categories), copy=False) _validate_join_method(how) @@ -4930,7 +4931,9 @@ def _wrap_join_result( elif ridx is None: join_index = other else: - join_index = self._constructor._with_infer(joined, dtype=self.dtype) + join_index = self._constructor._with_infer( + joined, dtype=self.dtype, copy=False + ) names = other.names if how == "right" else self.names if join_index.names != names: @@ -6368,7 +6371,7 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]: other = type(self).from_tuples(other) # type: ignore[attr-defined] except (TypeError, ValueError): # let's instead try with a straight Index - self = Index(self._values) + self = Index(self._values, copy=False) if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): # Reverse op so we dont need to re-implement on the subclasses @@ -7124,7 +7127,7 @@ def insert(self, loc: int, item) -> Index: new_values[loc] = item # GH#51363 stopped doing dtype inference here - out = Index(new_values, dtype=new_values.dtype, name=self.name) + out = Index(new_values, dtype=new_values.dtype, name=self.name, copy=False) return out def drop( @@ -7220,7 +7223,7 @@ def infer_objects(self, copy: bool = True) -> Index: ) if copy and res_values is values: return self.copy() - result = Index(res_values, name=self.name) + result = Index(res_values, name=self.name, copy=False) if not copy and res_values is values and self._references is not None: result._references = self._references result._references.add_index_reference(result) @@ -7329,10 +7332,10 @@ def _logical_method(self, other, op): def _construct_result(self, result, name, other): if isinstance(result, tuple): return ( - Index(result[0], name=name, dtype=result[0].dtype), - Index(result[1], name=name, dtype=result[1].dtype), + Index(result[0], name=name, dtype=result[0].dtype, copy=False), + Index(result[1], name=name, dtype=result[1].dtype, copy=False), ) - return Index(result, name=name, dtype=result.dtype) + return Index(result, name=name, dtype=result.dtype, copy=False) def _arith_method(self, other, op): if ( @@ -7350,7 +7353,7 @@ def _arith_method(self, other, op): @final def _unary_method(self, op): result = op(self._values) - return Index(result, name=self.name) + return Index(result, name=self.name, copy=False) def __abs__(self) -> Index: return self._unary_method(operator.abs) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index cbefaac77dd82..aee63a6ba4582 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -517,4 +517,4 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): Index(['first', 'second', nan], dtype='object') """ mapped = self._values.map(mapper, na_action=na_action) - return Index(mapped, name=self.name) + return Index(mapped, name=self.name, copy=False) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 72b009a344193..4a8cc611b1284 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -323,7 +323,7 @@ def strftime(self, date_format) -> Index: dtype='str') """ arr = self._data.strftime(date_format) - return Index(arr, name=self.name, dtype=arr.dtype) + return Index(arr, name=self.name, dtype=arr.dtype, copy=False) def tz_convert(self, tz) -> Self: """ diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 3b4fa372eec04..0c37aca23342f 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -74,7 +74,7 @@ def fget(self): return type(self)._simple_new(result, name=self.name) elif isinstance(result, ABCDataFrame): return result.set_index(self) - return Index(result, name=self.name, dtype=result.dtype) + return Index(result, name=self.name, dtype=result.dtype, copy=False) return result def fset(self, value) -> None: @@ -101,7 +101,7 @@ def method(self, *args, **kwargs): # type: ignore[misc] return type(self)._simple_new(result, name=self.name) elif isinstance(result, ABCDataFrame): return result.set_index(self) - return Index(result, name=self.name, dtype=result.dtype) + return Index(result, name=self.name, dtype=result.dtype, copy=False) return result # error: "property" has no attribute "__name__" diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1def317bc1a88..3bfacde94eb20 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -691,7 +691,7 @@ def _maybe_convert_i8(self, key): key_i8 = key_i8.view("i8") else: # DatetimeIndex/TimedeltaIndex - key_dtype, key_i8 = key.dtype, Index(key.asi8) + key_dtype, key_i8 = key.dtype, Index(key.asi8, copy=False) if key.hasnans: # convert NaT from its i8 value to np.nan so it's not viewed # as a valid value, maybe causing errors (e.g. is_overlapping) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3a0a1d8deacb3..8623a4bb061bb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1505,7 +1505,9 @@ def _get_values_for_csv( if len(new_levels) == 1: # a single-level multi-index - return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv() + return Index( + new_levels[0].take(new_codes[0]), copy=False + )._get_values_for_csv() else: # reconstruct the multi-index mi = MultiIndex( @@ -1732,10 +1734,10 @@ def is_monotonic_increasing(self) -> bool: # int, float, complex, str, bytes, _NestedSequence[Union # [bool, int, float, complex, str, bytes]]]" sort_order = np.lexsort(values) # type: ignore[arg-type] - return Index(sort_order).is_monotonic_increasing + return Index(sort_order, copy=False).is_monotonic_increasing except TypeError: # we have mixed types and np.lexsort is not happy - return Index(self._values).is_monotonic_increasing + return Index(self._values, copy=False).is_monotonic_increasing @cache_readonly def is_monotonic_decreasing(self) -> bool: @@ -1996,7 +1998,7 @@ def to_flat_index(self) -> Index: # type: ignore[override] ('bar', 'baz'), ('bar', 'qux')], dtype='object') """ - return Index(self._values, tupleize_cols=False) + return Index(self._values, tupleize_cols=False, copy=False) def _is_lexsorted(self) -> bool: """ @@ -2448,7 +2450,7 @@ def append(self, other): # setting names to None automatically return MultiIndex.from_tuples(new_tuples) except (TypeError, IndexError): - return Index(new_tuples) + return Index(new_tuples, copy=False) def argsort( self, *args, na_position: NaPosition = "last", **kwargs @@ -3077,7 +3079,7 @@ def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: lev = self.levels[0] codes = self._codes[0] cat = Categorical.from_codes(codes=codes, categories=lev, validate=False) - ci = Index(cat) + ci = Index(cat, copy=False) return ci.get_indexer_for(target) def get_slice_bound( diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b8a25ab0da693..a313a927a5d0b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -205,17 +205,17 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex: @property @doc(PeriodArray.hour.fget) def hour(self) -> Index: - return Index(self._data.hour, name=self.name) + return Index(self._data.hour, name=self.name, copy=False) @property @doc(PeriodArray.minute.fget) def minute(self) -> Index: - return Index(self._data.minute, name=self.name) + return Index(self._data.minute, name=self.name, copy=False) @property @doc(PeriodArray.second.fget) def second(self) -> Index: - return Index(self._data.second, name=self.name) + return Index(self._data.second, name=self.name, copy=False) # ------------------------------------------------------------------------ # Index Constructors @@ -410,7 +410,7 @@ def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray: Array of booleans where data is not NA. """ if isinstance(where, DatetimeIndex): - where = PeriodIndex(where._values, freq=self.freq) + where = PeriodIndex(where._values, freq=self.freq, copy=False) elif not isinstance(where, PeriodIndex): raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") @@ -625,4 +625,4 @@ def period_range( data, freq = PeriodArray._generate_range(start, end, periods, freq) dtype = PeriodDtype(freq) data = PeriodArray(data, dtype=dtype) - return PeriodIndex(data, name=name) + return PeriodIndex(data, name=name, copy=False) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 4a935c2afb43e..9635cbc91364e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -604,7 +604,7 @@ def _shallow_copy(self, values, name: Hashable = no_default): name = self._name if name is no_default else name if values.dtype.kind == "f": - return Index(values, name=name, dtype=np.float64) + return Index(values, name=name, dtype=np.float64, copy=False) if values.dtype.kind == "i" and values.ndim == 1: # GH 46675 & 43885: If values is equally spaced, return a # more memory-compact RangeIndex instead of Index with 64-bit dtype @@ -1231,7 +1231,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: ) else: values = np.concatenate([x._values for x in rng_indexes]) - result = self._constructor(values) + result = self._constructor(values, copy=False) return result.rename(name) step = rng.start - start diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a476415d6c7c0..31145a552084b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -894,7 +894,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: ): # GH#38148 keys = self.obj.columns.union(key, sort=False) - diff = Index(key).difference(self.obj.columns, sort=False) + diff = Index(key, copy=False).difference(self.obj.columns, sort=False) if len(diff): # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B" diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f7fd4da2968a7..467d6ba2da9af 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2097,8 +2097,8 @@ def get_join_indexers( lkey = left_keys[0] rkey = right_keys[0] - left = Index(lkey) - right = Index(rkey) + left = Index(lkey, copy=False) + right = Index(rkey, copy=False) if ( left.is_monotonic_increasing @@ -2529,7 +2529,7 @@ def _convert_values_for_libjoin( self, values: AnyArrayLike, side: str ) -> np.ndarray: # we require sortedness and non-null values in the join keys - if not Index(values).is_monotonic_increasing: + if not Index(values, copy=False).is_monotonic_increasing: if isna(values).any(): raise ValueError(f"Merge keys contain null values on {side} side") raise ValueError(f"{side} keys must be sorted") diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index accb577d5345c..c3ac1ce0d0dc2 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -490,7 +490,7 @@ def _unstack_multiple( if not rlocs: # Everything is in clocs, so the dummy df has a regular index - dummy_index = Index(obs_ids, name="__placeholder__") + dummy_index = Index(obs_ids, name="__placeholder__", copy=False) else: dummy_index = MultiIndex( levels=rlevels + [obs_ids], diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 659e82d979a91..d7ed603bd4ad4 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -446,7 +446,7 @@ def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index: else: bins[-1] += adj - return Index(bins) + return Index(bins, copy=False) def _bins_to_cuts( @@ -571,7 +571,7 @@ def _coerce_to_type(x: Index) -> tuple[Index, DtypeObj | None]: # https://github.com/pandas-dev/pandas/issues/31389 elif isinstance(x.dtype, ExtensionDtype) and is_numeric_dtype(x.dtype): x_arr = x.to_numpy(dtype=np.float64, na_value=np.nan) - x = Index(x_arr) + x = Index(x_arr, copy=False) return Index(x), dtype @@ -631,7 +631,7 @@ def _preprocess_for_cut(x) -> Index: if x.ndim != 1: raise ValueError("Input array must be 1 dimensional") - return Index(x) + return Index(x, copy=False) def _postprocess_for_cut(fac, bins, retbins: bool, original): diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 386681ceb1cf0..78174b0606fcf 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -383,7 +383,7 @@ def cons_row(x): out = out.get_level_values(0) return out else: - return Index(result, name=name, dtype=dtype) + return Index(result, name=name, dtype=dtype, copy=False) else: index = self._orig.index # This is a mess. @@ -703,7 +703,7 @@ def cat( if isna(result).all(): dtype = object # type: ignore[assignment] - out = Index(result, dtype=dtype, name=self._orig.name) + out = Index(result, dtype=dtype, name=self._orig.name, copy=False) else: # Series res_ser = Series( result, dtype=dtype, index=data.index, name=self._orig.name, copy=False diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c5c0aa4d61187..fc38f0ef35492 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -381,7 +381,7 @@ def _convert_listlike_datetimes( arg_array = arg_array._dt_tz_convert("UTC") else: arg_array = arg_array._dt_tz_localize("UTC") - arg = Index(arg_array) + arg = Index(arg_array, copy=False) else: # ArrowExtensionArray if arg_dtype.pyarrow_dtype.tz is not None: @@ -476,13 +476,13 @@ def _array_strptime_with_fallback( dta = DatetimeArray._simple_new(result, dtype=dtype) if utc: dta = dta.tz_convert("UTC") - return Index(dta, name=name) + return Index(dta, name=name, copy=False) elif result.dtype != object and utc: unit = np.datetime_data(result.dtype)[0] unit = cast("TimeUnit", unit) - res = Index(result, dtype=f"M8[{unit}, UTC]", name=name) + res = Index(result, dtype=f"M8[{unit}, UTC]", name=name, copy=False) return res - return Index(result, dtype=result.dtype, name=name) + return Index(result, dtype=result.dtype, name=name, copy=False) def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index bbdbe363d07ee..6789eb85b6c4b 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -241,5 +241,6 @@ def _convert_listlike( from pandas import TimedeltaIndex - value = TimedeltaIndex(td64arr, name=name) + copy = td64arr is arg or np.may_share_memory(arg, td64arr) + value = TimedeltaIndex(td64arr, name=name, copy=copy) return value diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index ae36a9a6176e9..4f4a28e630e9c 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -324,7 +324,9 @@ def _hash_ndarray( ) codes, categories = factorize(vals, sort=False) - tdtype = CategoricalDtype(categories=Index(categories), ordered=False) + tdtype = CategoricalDtype( + categories=Index(categories, copy=False), ordered=False + ) cat = Categorical._simple_new(codes, tdtype) return cat._hash_pandas_object( encoding=encoding, hash_key=hash_key, categorize=False diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index a6a5a7c23b506..64fd2836e87d4 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -377,7 +377,7 @@ def _agg_index(self, index) -> Index: ) if cast_type is not None: # Don't perform RangeIndex inference - idx = Index(arr, name=name, dtype=cast_type) + idx = Index(arr, name=name, dtype=cast_type, copy=False) else: idx = ensure_index_from_sequences([arr], [name]) arrays.append(idx) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 55a219eba1c61..66a67dc5d2b1d 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -509,7 +509,7 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi values, skipna=False, convert_na_value=False ) - cats = Index(values).unique().dropna() + cats = Index(values, copy=False).unique().dropna() values = Categorical._from_inferred_categories( cats, cats.get_indexer(values), cast_type, true_values=self.true_values ) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fa01fd5e4379c..4b4bc8708728a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2990,6 +2990,7 @@ def _get_index_factory(self, attrs): factory: Callable + kwargs = {} if index_class == DatetimeIndex: def f(values, freq=None, tz=None): @@ -3013,8 +3014,8 @@ def f(values, freq=None, tz=None): factory = f else: factory = index_class + kwargs["copy"] = False - kwargs = {} if "freq" in attrs: kwargs["freq"] = attrs["freq"] if index_class is Index: @@ -4451,7 +4452,7 @@ def read_coordinates( ) coords = coords[op(data.iloc[coords - coords.min()], filt).values] - return Index(coords) + return Index(coords, copy=False) def read_column( self, @@ -5183,15 +5184,15 @@ def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray if kind.startswith("datetime64"): if kind == "datetime64": # created before we stored resolution information - index = DatetimeIndex(data) + index = DatetimeIndex(data, copy=False) else: - index = DatetimeIndex(data.view(kind)) + index = DatetimeIndex(data.view(kind), copy=False) elif kind.startswith("timedelta64"): if kind == "timedelta64": # created before we stored resolution information - index = TimedeltaIndex(data) + index = TimedeltaIndex(data, copy=False) else: - index = TimedeltaIndex(data.view(kind)) + index = TimedeltaIndex(data.view(kind), copy=False) elif kind == "date": try: index = np.asarray([date.fromordinal(v) for v in data], dtype=object) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b44f595e73670..7ef6b6fe97e30 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1330,7 +1330,9 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame: if isinstance(ser.dtype, CategoricalDtype): cat = ser._values.remove_unused_categories() if cat.categories.dtype == object: - categories = pd.Index._with_infer(cat.categories._values) + categories = pd.Index._with_infer( + cat.categories._values, copy=False + ) cat = cat.set_categories(categories) elif cat.categories.dtype == "string" and len(cat.categories) == 0: # if the read categories are empty, it comes back as object dtype diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c4e6733b9a08d..196b3aadccaef 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -171,7 +171,7 @@ def infer_freq( ) if not isinstance(index, DatetimeIndex): - index = DatetimeIndex(index) + index = DatetimeIndex(index, copy=False) inferer = _FrequencyInferer(index) return inferer.get_freq()