Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def box_expected(expected, box_cls, transpose: bool = True):
else:
expected = pd.array(expected, copy=False)
elif box_cls is Index:
expected = Index(expected)
expected = Index(expected, copy=False)
elif box_cls is Series:
expected = Series(expected)
elif box_cls is DataFrame:
Expand Down
2 changes: 1 addition & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def index_with_missing(request):
vals = ind.values.copy()
vals[0] = None
vals[-1] = None
return type(ind)(vals)
return type(ind)(vals, copy=False)


# ----------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,7 @@ def value_counts_internal(

# Starting in 3.0, we no longer perform dtype inference on the
# Index object we construct here, xref GH#56161
idx = Index(keys, dtype=keys.dtype, name=index_name)
idx = Index(keys, dtype=keys.dtype, name=index_name, copy=False)

if (
not sort
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1972,7 +1972,7 @@ def relabel_result(
fun = [
com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
]
col_idx_order = Index(s.index).get_indexer(fun)
col_idx_order = Index(s.index, copy=False).get_indexer(fun)
valid_idx = col_idx_order != -1
if valid_idx.any():
s = s.iloc[col_idx_order[valid_idx]]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def value_counts(self, dropna: bool = True) -> Series:
result = value_counts(values, sort=False, dropna=dropna)

index_arr = self._from_backing_data(np.asarray(result.index._data))
index = Index(index_arr, name=result.index.name)
index = Index(index_arr, name=result.index.name, copy=False)
return Series(result._values, index=index, name=result.name, copy=False)

def _quantile(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1804,7 +1804,7 @@ def value_counts(self, dropna: bool = True) -> Series:

counts = ArrowExtensionArray(counts)

index = Index(self._from_pyarrow_array(values))
index = Index(self._from_pyarrow_array(values), copy=False)

return Series(counts, index=index, name="count", copy=False)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ def _from_inferred_categories(
to_timedelta,
)

cats = Index(inferred_categories)
cats = Index(inferred_categories, copy=False)
known_categories = (
isinstance(dtype, CategoricalDtype) and dtype.categories is not None
)
Expand Down Expand Up @@ -2397,7 +2397,7 @@ def _validate_listlike(self, value):
from pandas import Index

# tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914
to_add = Index._with_infer(value, tupleize_cols=False).difference(
to_add = Index._with_infer(value, tupleize_cols=False, copy=False).difference(
self.categories
)

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,7 +1422,8 @@ def value_counts(self, dropna: bool = True) -> Series:
self.dtype.construct_array_type()(
keys, # type: ignore[arg-type]
mask_index,
)
),
copy=False,
)
return Series(arr, index=index, name="count", copy=False)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ def value_counts(self, dropna: bool = True) -> Series:
counts = np.insert(counts, 0, fcounts)

if not isinstance(keys, ABCIndex):
index = Index(keys)
index = Index(keys, copy=False)
else:
index = keys
return Series(counts, index=index, copy=False)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,11 +1312,11 @@ def factorize(
from pandas import Index

try:
uniques = Index(uniques, dtype=self.dtype)
uniques = Index(uniques, dtype=self.dtype, copy=False)
except NotImplementedError:
# not all dtypes are supported in Index that are allowed for Series
# e.g. float16 or bytes
uniques = Index(uniques)
uniques = Index(uniques, copy=False)
return codes, uniques

_shared_docs["searchsorted"] = """
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ def _set_result_index_ordered(
return result

# row order is scrambled => sort the rows by position in original index
original_positions = Index(self._grouper.result_ilocs)
original_positions = Index(self._grouper.result_ilocs, copy=False)
result = result.set_axis(original_positions, axis=0)
result = result.sort_index(axis=0)
if self._grouper.has_dropped_na:
Expand Down Expand Up @@ -1298,7 +1298,7 @@ def _insert_inaxis_grouper(
if qs is None:
result.insert(0, name, lev)
else:
result.insert(0, name, Index(np.repeat(lev, len(qs))))
result.insert(0, name, Index(np.repeat(lev, len(qs)), copy=False))

return result

Expand Down Expand Up @@ -4392,7 +4392,7 @@ def _nth(
# error: No overload variant of "where" matches argument types
# "Any", "NAType", "Any"
values = np.where(nulls, NA, grouper) # type: ignore[call-overload]
grouper = Index(values, dtype="Int64")
grouper = Index(values, dtype="Int64", copy=False)

grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort)
return grb.nth(n)
Expand Down Expand Up @@ -5806,7 +5806,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
MultiIndex
"""
nqs = len(qs)
lev_codes, lev = Index(qs).factorize()
lev_codes, lev = Index(qs, copy=False).factorize()
lev_codes = coerce_indexer_dtype(lev_codes, lev)

if idx._is_multi:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,9 @@ def __init__(
# error: Cannot determine type of "grouping_vector" [has-type]
ng = newgrouper.groupings[0].grouping_vector # type: ignore[has-type]
# use Index instead of ndarray so we can recover the name
grouping_vector = Index(ng, name=newgrouper.result_index.name)
grouping_vector = Index(
ng, name=newgrouper.result_index.name, copy=False
)

elif not isinstance(
grouping_vector, (Series, Index, ExtensionArray, np.ndarray)
Expand Down Expand Up @@ -684,7 +686,7 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
@cache_readonly
def groups(self) -> dict[Hashable, Index]:
codes, uniques = self._codes_and_uniques
uniques = Index._with_infer(uniques, name=self.name)
uniques = Index._with_infer(uniques, name=self.name, copy=False)

r, counts = libalgos.groupsort_indexer(ensure_platform_int(codes), len(uniques))
counts = ensure_int64(counts).cumsum()
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,7 @@ def groups(self) -> dict[Hashable, Index]:
@cache_readonly
def is_monotonic(self) -> bool:
# return if my group orderings are monotonic
return Index(self.ids).is_monotonic_increasing
return Index(self.ids, copy=False).is_monotonic_increasing

@final
@cache_readonly
Expand Down Expand Up @@ -760,7 +760,9 @@ def ids(self) -> npt.NDArray[np.intp]:

@cache_readonly
def result_index_and_ids(self) -> tuple[Index, npt.NDArray[np.intp]]:
levels = [Index._with_infer(ping.uniques) for ping in self.groupings]
levels = [
Index._with_infer(ping.uniques, copy=False) for ping in self.groupings
]
obs = [
ping._observed or not ping._passed_categorical for ping in self.groupings
]
Expand Down
31 changes: 17 additions & 14 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ def _new_Index(cls, d):
"""
# required for backward compat, because PI can't be instantiated with
# ordinals through __new__ GH #13277
d["copy"] = False
if issubclass(cls, ABCPeriodIndex):
from pandas.core.indexes.period import _new_PeriodIndex

Expand Down Expand Up @@ -692,7 +693,7 @@ def _with_infer(cls, *args, **kwargs):
# "ndarray[Any, Any]"
values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
if values.dtype.kind in "iufb":
return Index(values, name=result.name)
return Index(values, name=result.name, copy=False)

return result

Expand Down Expand Up @@ -2775,7 +2776,7 @@ def fillna(self, value):
# no need to care metadata other than name
# because it can't have freq if it has NaTs
# _with_infer needed for test_fillna_categorical
return Index._with_infer(result, name=self.name)
return Index._with_infer(result, name=self.name, copy=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strictly speaking, this one is not needed I think because result returned from putmask above is an Index, and so we already do a shallow copy by default.

But no harm in keeping it to avoid confusion ;)

return self._view()

def dropna(self, how: AnyAll = "any") -> Self:
Expand Down Expand Up @@ -3910,8 +3911,8 @@ def _get_fill_indexer(
if not (self.is_monotonic_increasing or self.is_monotonic_decreasing):
raise ValueError("index must be monotonic increasing or decreasing")
encoded = self.append(target)._engine.values # type: ignore[union-attr]
self_encoded = Index(encoded[: len(self)])
target_encoded = Index(encoded[len(self) :])
self_encoded = Index(encoded[: len(self)], copy=False)
target_encoded = Index(encoded[len(self) :], copy=False)
return self_encoded._get_fill_indexer(
target_encoded, method, limit, tolerance
)
Expand Down Expand Up @@ -4338,7 +4339,7 @@ def _reindex_non_unique(
new_indexer[~check] = -1

if not isinstance(self, ABCMultiIndex):
new_index = Index(new_labels, name=self.name)
new_index = Index(new_labels, name=self.name, copy=False)
else:
new_index = type(self).from_tuples(new_labels, names=self.names)
return new_index, indexer, new_indexer
Expand Down Expand Up @@ -4487,7 +4488,7 @@ def join(
and not self.categories.equals(other.categories)
):
# dtypes are "equal" but categories are in different order
other = Index(other._values.reorder_categories(self.categories))
other = Index(other._values.reorder_categories(self.categories), copy=False)

_validate_join_method(how)

Expand Down Expand Up @@ -4930,7 +4931,9 @@ def _wrap_join_result(
elif ridx is None:
join_index = other
else:
join_index = self._constructor._with_infer(joined, dtype=self.dtype)
join_index = self._constructor._with_infer(
joined, dtype=self.dtype, copy=False
)

names = other.names if how == "right" else self.names
if join_index.names != names:
Expand Down Expand Up @@ -6368,7 +6371,7 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
other = type(self).from_tuples(other) # type: ignore[attr-defined]
except (TypeError, ValueError):
# let's instead try with a straight Index
self = Index(self._values)
self = Index(self._values, copy=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ._values of a MultiIndex is essentially always already a copy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but also any place we're passing in ._values it is always safe to not make a copy. If ._values is user-owned data, that is a problem in and of itself.


if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
# Reverse op so we dont need to re-implement on the subclasses
Expand Down Expand Up @@ -7124,7 +7127,7 @@ def insert(self, loc: int, item) -> Index:
new_values[loc] = item

# GH#51363 stopped doing dtype inference here
out = Index(new_values, dtype=new_values.dtype, name=self.name)
out = Index(new_values, dtype=new_values.dtype, name=self.name, copy=False)
return out

def drop(
Expand Down Expand Up @@ -7220,7 +7223,7 @@ def infer_objects(self, copy: bool = True) -> Index:
)
if copy and res_values is values:
return self.copy()
result = Index(res_values, name=self.name)
result = Index(res_values, name=self.name, copy=False)
if not copy and res_values is values and self._references is not None:
result._references = self._references
result._references.add_index_reference(result)
Expand Down Expand Up @@ -7329,10 +7332,10 @@ def _logical_method(self, other, op):
def _construct_result(self, result, name, other):
if isinstance(result, tuple):
return (
Index(result[0], name=name, dtype=result[0].dtype),
Index(result[1], name=name, dtype=result[1].dtype),
Index(result[0], name=name, dtype=result[0].dtype, copy=False),
Index(result[1], name=name, dtype=result[1].dtype, copy=False),
)
return Index(result, name=name, dtype=result.dtype)
return Index(result, name=name, dtype=result.dtype, copy=False)

def _arith_method(self, other, op):
if (
Expand All @@ -7350,7 +7353,7 @@ def _arith_method(self, other, op):
@final
def _unary_method(self, op):
result = op(self._values)
return Index(result, name=self.name)
return Index(result, name=self.name, copy=False)

def __abs__(self) -> Index:
return self._unary_method(operator.abs)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,4 +517,4 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
Index(['first', 'second', nan], dtype='object')
"""
mapped = self._values.map(mapper, na_action=na_action)
return Index(mapped, name=self.name)
return Index(mapped, name=self.name, copy=False)
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def strftime(self, date_format) -> Index:
dtype='str')
"""
arr = self._data.strftime(date_format)
return Index(arr, name=self.name, dtype=arr.dtype)
return Index(arr, name=self.name, dtype=arr.dtype, copy=False)

def tz_convert(self, tz) -> Self:
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def fget(self):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name, dtype=result.dtype)
return Index(result, name=self.name, dtype=result.dtype, copy=False)
return result

def fset(self, value) -> None:
Expand All @@ -101,7 +101,7 @@ def method(self, *args, **kwargs): # type: ignore[misc]
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name, dtype=result.dtype)
return Index(result, name=self.name, dtype=result.dtype, copy=False)
return result

# error: "property" has no attribute "__name__"
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def _maybe_convert_i8(self, key):
key_i8 = key_i8.view("i8")
else:
# DatetimeIndex/TimedeltaIndex
key_dtype, key_i8 = key.dtype, Index(key.asi8)
key_dtype, key_i8 = key.dtype, Index(key.asi8, copy=False)
if key.hasnans:
# convert NaT from its i8 value to np.nan so it's not viewed
# as a valid value, maybe causing errors (e.g. is_overlapping)
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,7 +1505,9 @@ def _get_values_for_csv(

if len(new_levels) == 1:
# a single-level multi-index
return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv()
return Index(
new_levels[0].take(new_codes[0]), copy=False
)._get_values_for_csv()
else:
# reconstruct the multi-index
mi = MultiIndex(
Expand Down Expand Up @@ -1732,10 +1734,10 @@ def is_monotonic_increasing(self) -> bool:
# int, float, complex, str, bytes, _NestedSequence[Union
# [bool, int, float, complex, str, bytes]]]"
sort_order = np.lexsort(values) # type: ignore[arg-type]
return Index(sort_order).is_monotonic_increasing
return Index(sort_order, copy=False).is_monotonic_increasing
except TypeError:
# we have mixed types and np.lexsort is not happy
return Index(self._values).is_monotonic_increasing
return Index(self._values, copy=False).is_monotonic_increasing

@cache_readonly
def is_monotonic_decreasing(self) -> bool:
Expand Down Expand Up @@ -1996,7 +1998,7 @@ def to_flat_index(self) -> Index: # type: ignore[override]
('bar', 'baz'), ('bar', 'qux')],
dtype='object')
"""
return Index(self._values, tupleize_cols=False)
return Index(self._values, tupleize_cols=False, copy=False)

def _is_lexsorted(self) -> bool:
"""
Expand Down Expand Up @@ -2448,7 +2450,7 @@ def append(self, other):
# setting names to None automatically
return MultiIndex.from_tuples(new_tuples)
except (TypeError, IndexError):
return Index(new_tuples)
return Index(new_tuples, copy=False)

def argsort(
self, *args, na_position: NaPosition = "last", **kwargs
Expand Down Expand Up @@ -3077,7 +3079,7 @@ def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:
lev = self.levels[0]
codes = self._codes[0]
cat = Categorical.from_codes(codes=codes, categories=lev, validate=False)
ci = Index(cat)
ci = Index(cat, copy=False)
return ci.get_indexer_for(target)

def get_slice_bound(
Expand Down
Loading
Loading