Skip to content

Commit 16b396d

Browse files
committed
Revert "Merge pull request ClickHouse#62747 from bigo-sg/percent_rank"
This reverts commit 532eb28, reversing changes made to 6113df4.
1 parent c0bd212 commit 16b396d

4 files changed

Lines changed: 32 additions & 217 deletions

File tree

docs/en/sql-reference/window-functions/index.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ ClickHouse supports the standard grammar for defining windows and window functio
2424
| `GROUPS` frame ||
2525
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) |
2626
| `rank()`, `dense_rank()`, `row_number()` ||
27-
| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`|
2827
| `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
2928
| ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
3029

src/Processors/Transforms/WindowTransform.cpp

Lines changed: 32 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@
2020
#include <Functions/IFunction.h>
2121
#include <DataTypes/DataTypeString.h>
2222

23-
#include <Poco/Logger.h>
24-
#include <Common/logger_useful.h>
25-
2623
#include <limits>
2724

2825

@@ -82,9 +79,6 @@ class IWindowFunction
8279
virtual std::optional<WindowFrame> getDefaultFrame() const { return {}; }
8380

8481
virtual ColumnPtr castColumn(const Columns &, const std::vector<size_t> &) { return nullptr; }
85-
86-
/// Is the frame type supported by this function.
87-
virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; }
8882
};
8983

9084
// Compares ORDER BY column values at given rows to find the boundaries of frame:
@@ -416,19 +410,6 @@ WindowTransform::WindowTransform(const Block & input_header_,
416410
}
417411
}
418412
}
419-
420-
for (const auto & workspace : workspaces)
421-
{
422-
if (workspace.window_function_impl)
423-
{
424-
if (!workspace.window_function_impl->checkWindowFrameType(this))
425-
{
426-
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'",
427-
workspace.aggregate_function->getName());
428-
}
429-
}
430-
431-
}
432413
}
433414

434415
WindowTransform::~WindowTransform()
@@ -1639,34 +1620,6 @@ struct WindowFunctionHelpers
16391620
{
16401621
recurrent_detail::setValueToOutputColumn<T>(transform, function_index, value);
16411622
}
1642-
1643-
ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; }
1644-
1645-
ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform)
1646-
{
1647-
/// This is for fast check.
1648-
if (!transform->partition_ended)
1649-
return false;
1650-
1651-
auto current_row = transform->current_row;
1652-
/// checkPartitionEnterLastRow is called on each row, also move on current_row.row here.
1653-
current_row.row++;
1654-
const auto & partition_end_row = transform->partition_end;
1655-
1656-
/// The partition end is reached, when following is true
1657-
/// - current row is the partition end row,
1658-
/// - or current row is the last row of all input.
1659-
if (current_row != partition_end_row)
1660-
{
1661-
/// when current row is not the partition end row, we need to check whether it's the last
1662-
/// input row.
1663-
if (current_row.row < transform->blockRowsNumber(current_row))
1664-
return false;
1665-
if (partition_end_row.block != current_row.block + 1 || partition_end_row.row)
1666-
return false;
1667-
}
1668-
return true;
1669-
}
16701623
};
16711624

16721625
template<typename State>
@@ -2116,6 +2069,8 @@ namespace
21162069
const WindowTransform * transform,
21172070
size_t function_index,
21182071
const DataTypes & argument_types);
2072+
2073+
static void checkWindowFrameType(const WindowTransform * transform);
21192074
};
21202075
}
21212076

@@ -2136,29 +2091,6 @@ struct WindowFunctionNtile final : public StatefulWindowFunction<NtileState>
21362091

21372092
bool allocatesMemoryInArena() const override { return false; }
21382093

2139-
bool checkWindowFrameType(const WindowTransform * transform) const override
2140-
{
2141-
if (transform->order_by_indices.empty())
2142-
{
2143-
LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause");
2144-
return false;
2145-
}
2146-
2147-
// We must wait all for the partition end and get the total rows number in this
2148-
// partition. So before the end of this partition, there is no any block could be
2149-
// dropped out.
2150-
bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded
2151-
&& transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded;
2152-
if (!is_frame_supported)
2153-
{
2154-
LOG_ERROR(
2155-
getLogger("WindowFunctionNtile"),
2156-
"Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'");
2157-
return false;
2158-
}
2159-
return true;
2160-
}
2161-
21622094
std::optional<WindowFrame> getDefaultFrame() const override
21632095
{
21642096
WindowFrame frame;
@@ -2185,6 +2117,7 @@ namespace
21852117
{
21862118
if (!buckets) [[unlikely]]
21872119
{
2120+
checkWindowFrameType(transform);
21882121
const auto & current_block = transform->blockAt(transform->current_row);
21892122
const auto & workspace = transform->workspaces[function_index];
21902123
const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]];
@@ -2206,7 +2139,7 @@ namespace
22062139
}
22072140
}
22082141
// new partition
2209-
if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]]
2142+
if (transform->current_row_number == 1) [[unlikely]]
22102143
{
22112144
current_partition_rows = 0;
22122145
current_partition_inserted_row = 0;
@@ -2215,9 +2148,25 @@ namespace
22152148
current_partition_rows++;
22162149

22172150
// Only do the action when we meet the last row in this partition.
2218-
if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform))
2151+
if (!transform->partition_ended)
22192152
return;
2153+
else
2154+
{
2155+
auto current_row = transform->current_row;
2156+
current_row.row++;
2157+
const auto & end_row = transform->partition_end;
2158+
if (current_row != end_row)
2159+
{
22202160

2161+
if (current_row.row < transform->blockRowsNumber(current_row))
2162+
return;
2163+
if (end_row.block != current_row.block + 1 || end_row.row)
2164+
{
2165+
return;
2166+
}
2167+
// else, current_row is the last input row.
2168+
}
2169+
}
22212170
auto bucket_capacity = current_partition_rows / buckets;
22222171
auto capacity_diff = current_partition_rows - bucket_capacity * buckets;
22232172

@@ -2255,115 +2204,23 @@ namespace
22552204
bucket_num += 1;
22562205
}
22572206
}
2258-
}
2259-
2260-
namespace
2261-
{
2262-
struct PercentRankState
2263-
{
2264-
RowNumber start_row;
2265-
UInt64 current_partition_rows = 0;
2266-
};
2267-
}
22682207

2269-
struct WindowFunctionPercentRank final : public StatefulWindowFunction<PercentRankState>
2270-
{
2271-
public:
2272-
WindowFunctionPercentRank(const std::string & name_,
2273-
const DataTypes & argument_types_, const Array & parameters_)
2274-
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
2275-
{}
2276-
2277-
bool allocatesMemoryInArena() const override { return false; }
2278-
2279-
bool checkWindowFrameType(const WindowTransform * transform) const override
2280-
{
2281-
if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE
2282-
|| transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded
2283-
|| transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current)
2284-
{
2285-
LOG_ERROR(
2286-
getLogger("WindowFunctionPercentRank"),
2287-
"Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'");
2288-
return false;
2289-
}
2290-
return true;
2291-
}
2292-
2293-
std::optional<WindowFrame> getDefaultFrame() const override
2294-
{
2295-
WindowFrame frame;
2296-
frame.type = WindowFrame::FrameType::RANGE;
2297-
frame.begin_type = WindowFrame::BoundaryType::Unbounded;
2298-
frame.end_type = WindowFrame::BoundaryType::Current;
2299-
return frame;
2300-
}
2301-
2302-
void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override
2208+
void NtileState::checkWindowFrameType(const WindowTransform * transform)
23032209
{
2304-
auto & state = getWorkspaceState(transform, function_index);
2305-
if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform))
2306-
{
2307-
state.current_partition_rows = 0;
2308-
state.start_row = transform->current_row;
2309-
}
2310-
2311-
insertRankIntoColumn(transform, function_index);
2312-
state.current_partition_rows++;
2313-
2314-
if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform))
2315-
{
2316-
return;
2317-
}
2318-
2319-
UInt64 remaining_rows = state.current_partition_rows;
2320-
Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1;
2210+
if (transform->order_by_indices.empty())
2211+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause");
23212212

2322-
while (remaining_rows > 0)
2213+
// We must wait all for the partition end and get the total rows number in this
2214+
// partition. So before the end of this partition, there is no any block could be
2215+
// dropped out.
2216+
bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded
2217+
&& transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded;
2218+
if (!is_frame_supported)
23232219
{
2324-
auto block_rows_number = transform->blockRowsNumber(state.start_row);
2325-
auto available_block_rows = block_rows_number - state.start_row.row;
2326-
if (available_block_rows <= remaining_rows)
2327-
{
2328-
/// This partition involves multiple blocks. Finish current block and move on to the
2329-
/// next block.
2330-
auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index];
2331-
auto & data = assert_cast<ColumnFloat64 &>(to_column).getData();
2332-
for (size_t i = state.start_row.row; i < block_rows_number; ++i)
2333-
data[i] = (data[i] - 1) / percent_rank_denominator;
2334-
2335-
state.start_row.block++;
2336-
state.start_row.row = 0;
2337-
remaining_rows -= available_block_rows;
2338-
}
2339-
else
2340-
{
2341-
/// The partition ends in current block.s
2342-
auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index];
2343-
auto & data = assert_cast<ColumnFloat64 &>(to_column).getData();
2344-
for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i)
2345-
{
2346-
data[i] = (data[i] - 1) / percent_rank_denominator;
2347-
}
2348-
state.start_row.row += remaining_rows;
2349-
remaining_rows = 0;
2350-
}
2220+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'");
23512221
}
23522222
}
2353-
2354-
2355-
inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const
2356-
{
2357-
const auto & workspace = transform->workspaces[function_index];
2358-
return getState(workspace);
2359-
}
2360-
2361-
inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const
2362-
{
2363-
auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index];
2364-
assert_cast<ColumnFloat64 &>(to_column).getData().push_back(static_cast<Float64>(transform->peer_group_start_row_number));
2365-
}
2366-
};
2223+
}
23672224

23682225
// ClickHouse-specific variant of lag/lead that respects the window frame.
23692226
template <bool is_lead>
@@ -2779,13 +2636,6 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
27792636
parameters);
27802637
}, properties}, AggregateFunctionFactory::Case::Insensitive);
27812638

2782-
factory.registerFunction("percent_rank", {[](const std::string & name,
2783-
const DataTypes & argument_types, const Array & parameters, const Settings *)
2784-
{
2785-
return std::make_shared<WindowFunctionPercentRank>(name, argument_types,
2786-
parameters);
2787-
}, properties}, AggregateFunctionFactory::Case::Insensitive);
2788-
27892639
factory.registerFunction("row_number", {[](const std::string & name,
27902640
const DataTypes & argument_types, const Array & parameters, const Settings *)
27912641
{

tests/queries/0_stateless/01592_window_functions.reference

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,3 @@ iPhone 900 Smartphone 500 500
7979
Kindle Fire 150 Tablet 150 350
8080
Samsung Galaxy Tab 200 Tablet 175 350
8181
iPad 700 Tablet 350 350
82-
---- Q8 ----
83-
Lenovo Thinkpad Laptop 700 1 0
84-
Sony VAIO Laptop 700 1 0
85-
Dell Vostro Laptop 800 3 0.6666666666666666
86-
HP Elite Laptop 1200 4 1
87-
Microsoft Lumia Smartphone 200 1 0
88-
HTC One Smartphone 400 2 0.3333333333333333
89-
Nexus Smartphone 500 3 0.6666666666666666
90-
iPhone Smartphone 900 4 1
91-
Kindle Fire Tablet 150 1 0
92-
Samsung Galaxy Tab Tablet 200 2 0.5
93-
iPad Tablet 700 3 1
94-
Others Unknow 200 1 0

tests/queries/0_stateless/01592_window_functions.sql

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -101,26 +101,5 @@ SELECT
101101
FROM products INNER JOIN product_groups USING (group_id)) t
102102
order by group_name, product_name, price;
103103

104-
select '---- Q8 ----';
105-
INSERT INTO product_groups VALUES (4, 'Unknow');
106-
INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200);
107-
108-
SELECT *
109-
FROM
110-
(
111-
SELECT
112-
product_name,
113-
group_name,
114-
price,
115-
rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank,
116-
percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent
117-
FROM products
118-
INNER JOIN product_groups USING (group_id)
119-
) AS t
120-
ORDER BY
121-
group_name ASC,
122-
price ASC,
123-
product_name ASC;
124-
125104
drop table product_groups;
126105
drop table products;

0 commit comments

Comments
 (0)