@@ -998,8 +998,7 @@ def rank_1d(
998998
999999 N = len (values)
10001000 if labels is not None :
1001- # TODO(cython3): cast won't be necessary (#2992)
1002- assert < Py_ssize_t> len (labels) == N
1001+ assert len (labels) == N
10031002 out = np.empty(N)
10041003 grp_sizes = np.ones(N, dtype = np.int64)
10051004
@@ -1088,8 +1087,7 @@ cdef void rank_sorted_1d(
10881087 float64_t[::1 ] out,
10891088 int64_t[::1 ] grp_sizes,
10901089 const intp_t[:] sort_indexer,
1091- # TODO(cython3): make const (https://github.com/cython/cython/issues/3222)
1092- numeric_object_t[:] masked_vals,
1090+ const numeric_object_t[:] masked_vals,
10931091 const uint8_t[:] mask,
10941092 bint check_mask,
10951093 Py_ssize_t N,
@@ -1144,108 +1142,7 @@ cdef void rank_sorted_1d(
11441142 # array that we sorted previously, which gives us the location of
11451143 # that sorted value for retrieval back from the original
11461144 # values / masked_vals arrays
1147- # TODO(cython3): de-duplicate once cython supports conditional nogil
1148- if numeric_object_t is object :
1149- with gil:
1150- for i in range (N):
1151- at_end = i == N - 1
1152-
1153- # dups and sum_ranks will be incremented each loop where
1154- # the value / group remains the same, and should be reset
1155- # when either of those change. Used to calculate tiebreakers
1156- dups += 1
1157- sum_ranks += i - grp_start + 1
1158-
1159- next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160- masked_vals[sort_indexer[i+ 1 ]])
1161-
1162- # We'll need this check later anyway to determine group size, so just
1163- # compute it here since shortcircuiting won't help
1164- group_changed = at_end or (check_labels and
1165- (labels[sort_indexer[i]]
1166- != labels[sort_indexer[i+ 1 ]]))
1167-
1168- # Update out only when there is a transition of values or labels.
1169- # When a new value or group is encountered, go back #dups steps(
1170- # the number of occurrence of current value) and assign the ranks
1171- # based on the starting index of the current group (grp_start)
1172- # and the current index
1173- if (next_val_diff or group_changed or (check_mask and
1174- (mask[sort_indexer[i]]
1175- ^ mask[sort_indexer[i+ 1 ]]))):
1176-
1177- # If keep_na, check for missing values and assign back
1178- # to the result where appropriate
1179- if keep_na and check_mask and mask[sort_indexer[i]]:
1180- grp_na_count = dups
1181- for j in range (i - dups + 1 , i + 1 ):
1182- out[sort_indexer[j]] = NaN
1183- elif tiebreak == TIEBREAK_AVERAGE:
1184- for j in range (i - dups + 1 , i + 1 ):
1185- out[sort_indexer[j]] = sum_ranks / < float64_t> dups
1186- elif tiebreak == TIEBREAK_MIN:
1187- for j in range (i - dups + 1 , i + 1 ):
1188- out[sort_indexer[j]] = i - grp_start - dups + 2
1189- elif tiebreak == TIEBREAK_MAX:
1190- for j in range (i - dups + 1 , i + 1 ):
1191- out[sort_indexer[j]] = i - grp_start + 1
1192-
1193- # With n as the previous rank in the group and m as the number
1194- # of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
1195- # then rankings should be n + 1, n + 2 ... n + m
1196- elif tiebreak == TIEBREAK_FIRST:
1197- for j in range (i - dups + 1 , i + 1 ):
1198- out[sort_indexer[j]] = j + 1 - grp_start
1199-
1200- # If TIEBREAK_FIRST and descending, the ranking should be
1201- # n + m, n + (m - 1) ... n + 1. This is equivalent to
1202- # (i - dups + 1) + (i - j + 1) - grp_start
1203- elif tiebreak == TIEBREAK_FIRST_DESCENDING:
1204- for j in range (i - dups + 1 , i + 1 ):
1205- out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
1206- elif tiebreak == TIEBREAK_DENSE:
1207- for j in range (i - dups + 1 , i + 1 ):
1208- out[sort_indexer[j]] = grp_vals_seen
1209-
1210- # Look forward to the next value (using the sorting in
1211- # lexsort_indexer). If the value does not equal the current
1212- # value then we need to reset the dups and sum_ranks, knowing
1213- # that a new value is coming up. The conditional also needs
1214- # to handle nan equality and the end of iteration. If group
1215- # changes we do not record seeing a new value in the group
1216- if not group_changed and (next_val_diff or (check_mask and
1217- (mask[sort_indexer[i]]
1218- ^ mask[sort_indexer[i+ 1 ]]))):
1219- dups = sum_ranks = 0
1220- grp_vals_seen += 1
1221-
1222- # Similar to the previous conditional, check now if we are
1223- # moving to a new group. If so, keep track of the index where
1224- # the new group occurs, so the tiebreaker calculations can
1225- # decrement that from their position. Fill in the size of each
1226- # group encountered (used by pct calculations later). Also be
1227- # sure to reset any of the items helping to calculate dups
1228- if group_changed:
1229-
1230- # If not dense tiebreak, group size used to compute
1231- # percentile will be # of non-null elements in group
1232- if tiebreak != TIEBREAK_DENSE:
1233- grp_size = i - grp_start + 1 - grp_na_count
1234-
1235- # Otherwise, it will be the number of distinct values
1236- # in the group, subtracting 1 if NaNs are present
1237- # since that is a distinct value we shouldn't count
1238- else :
1239- grp_size = grp_vals_seen - (grp_na_count > 0 )
1240-
1241- for j in range (grp_start, i + 1 ):
1242- grp_sizes[sort_indexer[j]] = grp_size
1243-
1244- dups = sum_ranks = 0
1245- grp_na_count = 0
1246- grp_start = i + 1
1247- grp_vals_seen = 1
1248- else :
1145+ with gil(numeric_object_t is object ):
12491146 for i in range (N):
12501147 at_end = i == N - 1
12511148
@@ -1474,16 +1371,18 @@ ctypedef fused out_t:
14741371@ cython.boundscheck (False )
14751372@ cython.wraparound (False )
14761373def diff_2d (
1477- ndarray[diff_t , ndim = 2 ] arr, # TODO(cython3 ) update to "const diff_t[:, :] arr"
1478- ndarray[out_t , ndim = 2 ] out,
1374+ # TODO: cython bug (post Cython 3) prevents update to "const diff_t[:, :] arr"
1375+ ndarray[diff_t , ndim = 2 ] arr,
1376+ out_t[:, :] out ,
14791377 Py_ssize_t periods ,
14801378 int axis ,
14811379 bint datetimelike = False ,
14821380):
14831381 cdef:
14841382 Py_ssize_t i, j, sx, sy, start, stop
14851383 bint f_contig = arr.flags.f_contiguous
1486- # bint f_contig = arr.is_f_contig() # TODO(cython3)
1384+ # TODO: change to this when arr becomes a memoryview
1385+ # bint f_contig = arr.is_f_contig()
14871386 diff_t left, right
14881387
14891388 # Disable for unsupported dtype combinations,
0 commit comments