Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion contrib/kvproto
Submodule kvproto updated 67 files
+7 −0 .editorconfig
+6 −0 Makefile
+1 −1 OWNERS_ALIASES
+10 −0 README.md
+101 −101 include/eraftpb.proto
+33 −33 include/rustproto.proto
+553 −305 pkg/brpb/brpb.pb.go
+92 −92 pkg/cdcpb/cdcpb.pb.go
+3 −3 pkg/configpb/configpb.pb.go
+442 −94 pkg/coprocessor/coprocessor.pb.go
+145 −146 pkg/debugpb/debugpb.pb.go
+5 −5 pkg/disaggregated/disaggregated.pb.go
+9 −9 pkg/disk_usage/disk_usage.pb.go
+46 −46 pkg/enginepb/enginepb.pb.go
+82 −82 pkg/errorpb/errorpb.pb.go
+55 −55 pkg/import_kvpb/import_kvpb.pb.go
+244 −155 pkg/import_sstpb/import_sstpb.pb.go
+40 −39 pkg/keyspacepb/keyspacepb.pb.go
+1,466 −544 pkg/kvrpcpb/kvrpcpb.pb.go
+168 −60 pkg/mpp/mpp.pb.go
+873 −587 pkg/pdpb/pdpb.pb.go
+172 −172 pkg/raft_cmdpb/raft_cmdpb.pb.go
+144 −144 pkg/raft_serverpb/raft_serverpb.pb.go
+184 −124 pkg/resource_manager/resource_manager.pb.go
+21 −21 pkg/routerpb/routerpb.pb.go
+94 −94 pkg/schedulingpb/schedulingpb.pb.go
+250 −177 pkg/tikvpb/tikvpb.pb.go
+59 −59 pkg/tsopb/tsopb.pb.go
+23 −25 proto/autoid.proto
+732 −726 proto/brpb.proto
+135 −137 proto/cdcpb.proto
+77 −81 proto/configpb.proto
+133 −114 proto/coprocessor.proto
+46 −47 proto/deadlock.proto
+179 −191 proto/debugpb.proto
+53 −53 proto/diagnosticspb.proto
+95 −97 proto/disaggregated.proto
+9 −10 proto/disk_usage.proto
+95 −97 proto/encryptionpb.proto
+47 −48 proto/enginepb.proto
+108 −111 proto/errorpb.proto
+5 −6 proto/gcpb.proto
+74 −83 proto/import_kvpb.proto
+324 −327 proto/import_sstpb.proto
+12 −14 proto/keyspacepb.proto
+1,187 −1,155 proto/kvrpcpb.proto
+26 −29 proto/logbackuppb.proto
+100 −102 proto/meta_storagepb.proto
+101 −103 proto/metapb.proto
+55 −56 proto/mpp.proto
+809 −801 proto/pdpb.proto
+215 −218 proto/raft_cmdpb.proto
+207 −209 proto/raft_serverpb.proto
+42 −42 proto/recoverdatapb.proto
+40 −40 proto/replication_modepb.proto
+52 −48 proto/resource_manager.proto
+27 −29 proto/resource_usage_agent.proto
+13 −15 proto/routerpb.proto
+138 −140 proto/schedulingpb.proto
+261 −254 proto/tikvpb.proto
+32 −33 proto/tracepb.proto
+73 −75 proto/tsopb.proto
+11 −1 scripts/check.sh
+207 −3 scripts/common.sh
+4 −5 scripts/generate_cpp.sh
+716 −508 scripts/proto.lock
+139 −0 scripts/proto_format.sh
4 changes: 3 additions & 1 deletion dbms/src/Flash/Coprocessor/DAGContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ DAGContext::DAGContext(tipb::DAGRequest & dag_request_, const mpp::TaskMeta & me
, resource_group_name(meta_.resource_group_name())
, connection_id(meta_.connection_id())
, connection_alias(meta_.connection_alias())
, sql_digest(meta_.sql_digest())
, plan_digest(meta_.plan_digest())
{
if (dag_request->has_div_precision_increment())
div_precision_increment = dag_request->div_precision_increment();
Expand Down Expand Up @@ -483,7 +485,7 @@ UInt64 DAGContext::getReadBytes() const
UInt64 read_bytes = 0;
for (const auto & [id, sc] : scan_context_map)
{
(void)id; // Disable unused variable warnning.
(void)id; // Disable unused variable warning.
read_bytes += sc->userReadBytes();
}
return read_bytes;
Expand Down
4 changes: 4 additions & 0 deletions dbms/src/Flash/Coprocessor/DAGContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ class DAGContext

UInt64 getConnectionID() const { return connection_id; }
const String & getConnectionAlias() const { return connection_alias; }
const String & getSQLDigest() const { return sql_digest; }
const String & getPlanDigest() const { return plan_digest; }

MPPReceiverSetPtr getMPPReceiverSet() const { return mpp_receiver_set; }

Expand Down Expand Up @@ -544,6 +546,8 @@ class DAGContext
UInt64 connection_id;
// It's the session alias between mysql client and tidb
String connection_alias;
String sql_digest;
String plan_digest;

String query_id_and_cte_id_for_sink;
std::unordered_map<size_t, String> query_id_and_cte_id_for_sources;
Expand Down
7 changes: 5 additions & 2 deletions dbms/src/Flash/FlashService.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,14 @@ grpc::Status FlashService::DispatchMPPTask(
const auto & resource_group = task_meta.resource_group_name();
LOG_INFO(
log,
"Handling mpp dispatch request, task: {}, resource_group: {}, conn_id: {}, conn_alias: {}",
"Handling mpp dispatch request, task: {}, resource_group: {}, conn_id: {}, conn_alias: {}, "
"sql_digest: {}, plan_digest: {}",
MPPTaskId(task_meta).toString(),
resource_group,
task_meta.connection_id(),
task_meta.connection_alias());
task_meta.connection_alias(),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P0] Unbounded sql_digest accepted from gRPC can cause OOM/disk-fill DoS

Why: sql_digest is accepted from gRPC without any length or format validation and is logged at INFO level before security checks (checkGrpcContext at line 504). With gRPC max receive size set to unlimited (SetMaxReceiveMessageSize(-1) in FlashGrpcServerHolder.cpp:172), a single malicious request can force very large allocations and log writes, potentially OOM-crashing the TiFlash process or rapidly consuming disk space.

Evidence:

  • Untrusted input logged before security check: FlashService.cpp:488-503
  • Copied into DAGContext without validation: DAGContext.cpp:107 (sql_digest(meta_.sql_digest()))
  • Unlimited gRPC receive size: FlashGrpcServerHolder.cpp:172
  • Default bind on all interfaces: 0.0.0.0:3930

Fix: Enforce max size (e.g., 128 bytes) and expected pattern (fixed hex digest length) on sql_digest before storing/logging. Truncate or hash if oversized.

task_meta.sql_digest(),
task_meta.plan_digest());
auto check_result = checkGrpcContext(grpc_context);
if (!check_result.ok())
return check_result;
Expand Down
9 changes: 9 additions & 0 deletions dbms/src/Flash/Mpp/MPPTaskStatistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ void MPPTaskStatistics::initializeExecutorDAG(DAGContext * dag_context_)

is_root = dag_context->isRootMPPTask();
sender_executor_id = root_executor.executor_id();
connection_id = dag_context->getConnectionID();
connection_alias = dag_context->getConnectionAlias();
sql_digest = dag_context->getSQLDigest();
plan_digest = dag_context->getPlanDigest();
executor_statistics_collector.initialize(dag_context);
}

Expand Down Expand Up @@ -110,6 +114,7 @@ void MPPTaskStatistics::logTracingJson()
/// don't use info log for initializing status since it does not contains too many information
status == INITIALIZING ? Poco::Message::PRIO_DEBUG : Poco::Message::PRIO_INFORMATION,
R"({{"query_tso":{},"task_id":{},"is_root":{},"sender_executor_id":"{}","executors":{},"host":"{}")"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P2] JSON tracing log builds JSON without escaping user-controlled fields

Why: logTracingJson() constructs JSON using fmt::format and interpolates connection_alias and sql_digest as raw strings (\"{}\"). If these fields contain quotes, backslashes, or control characters, the emitted JSON becomes invalid, breaking downstream log parsers and ETL pipelines that expect valid JSON.

Evidence:

  • Line 116: R\"(,\"connection_id\":{},\"connection_alias\":\"{}\",\"sql_digest\":\"{}\")\" with raw string interpolation
  • Fields sourced from protobuf without sanitization: DAGContext.cpp:84-107

Fix: JSON-escape connection_alias, sql_digest, and other string fields (e.g., error_message, status) before interpolation, or use a proper JSON serialization library instead of manual string formatting.

R"(,"connection_id":{},"connection_alias":"{}","sql_digest":"{}","plan_digest":"{}")"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@windtalker I think only log it in Handling mpp dispatch request is enough?

This is useful to add sql_digest in the final report logging of MPPTaskStatistics.cpp. The MPPTaskStatistics.cpp logging contains rich information, such as how much RU was consumed, which executors were used, and how much data was scanned by the storage layer. We can filter logs containing MPPTaskStatistics.cpp and related information using Loki queries according to our needs.

For example, in the internal case of O11Y-2155585, we can filter the SQLs only with TopN executor that are executed on tiflash by Loki query like

{...", container=~"serverlog.*"} |= `MPPTaskStatistics` |~ `TopN`

and tidb log by Loki query like

{... container=~".*tidb"} |= `local_mpp_coordinator.go` |~ `TopN`

If there are

[2026/03/05 03:19:40.409 +00:00] [INFO] [MPPTaskStatistics.cpp:139] ["{\"query_tso\":464697630464938873,\"task_id\":1,\"is_root\":true,\"sender_executor_id\":\"ExchangeSender_53\",\"executors\":[{\"id\":\"ExchangeSender_53\",\"type\":\"ExchangeSender\",\"children\":[\"TopN_52\"],\"outbound_rows\":500,\"outbound_blocks\":1,\"outbound_bytes\":44960,\"outbound_allocated_bytes\":53248,\"concurrency\":32,\"execution_time_ns\":89997426,\"partition_num\":1,\"sender_target_task_ids\":[-1],\"exchange_type\":\"PassThrough\",\"connection_details\":[{\"tunnel_id\":\"tunnel1+-1\",\"sender_target_task_id\":-1,\"sender_target_host\":\"172.26.26.37:33298\",\"is_local\":false,\"packets\":1,\"bytes\":43054}]},{\"id\":\"Selection_51\",\"type\":\"Selection\",\"children\":[\"TableFullScan_50\"],\"outbound_rows\":608139,\"outbound_blocks\":206,\"outbound_bytes\":52208714,\"outbound_allocated_bytes\":74575488,\"concurrency\":33,\"execution_time_ns\":79997710},{\"id\":\"TableFullScan_50\",\"type\":\"TableScan\",\"children\":[],\"outbound_rows\":1020425,\"outbound_blocks\":206,\"outbound_bytes\":76121302,\"outbound_allocated_bytes\":250056320,\"concurrency\":33,\"execution_time_ns\":79997710,\"connection_details\":[{\"is_local\":true,\"packets\":0,\"bytes\":75914204,\"max\":39.998855,\"min\":29.999141},{\"is_local\":false,\"packets\":1,\"bytes\":137933,\"max\":79.99771,\"min\":79.99771}],\"scan_details\":{\"build_bitmap_time\":\"174.074ms\",\"build_stream_time\":\"177.412ms\",\"create_snapshot_time\":\"0.089ms\",\"delta_bytes\":255338442,\"delta_rows\":243061,\"dmfile_data_scanned_rows\":1634016,\"dmfile_data_skipped_rows\":7913032,\"dmfile_lm_filter_scanned_rows\":1650083,\"dmfile_lm_filter_skipped_rows\":7896965,\"dmfile_mvcc_scanned_rows\":1650083,\"dmfile_mvcc_skipped_rows\":7896965,\"dmfile_read_time\":\"133.610ms\",\"late_materialization_skip_rows\":16067,\"learner_read_time\":\"1.632ms\",\"local_max_stream_cost_ms\":\"39.999ms\",\"local_min_stream_cost_ms\":\"29.999ms\",\"mvcc_input_bytes\":27660819,\"mvcc_input_rows\":1627107,\"mvcc_read_bytes\":30523857,\"mvcc_skip_rows\":515147,\"num_columns\":6,\"num_local_region\":39,\"num_read_tasks\":15,\"num_remote_region\":1,\"num_segments\":15,\"num_stale_read\":0,\"pushdown\":{\"rs_operator\":{\"children\":[{\"op\":\"unsupported\",\"reason\":\"ColumnRef with field type is not supported, sig=EQString field_type=15\"},{\"op\":\"unsupported\",\"reason\":\"ColumnRef with field type is not supported, sig=EQString field_type=15\"},{\"children\":[{\"op\":\"unsupported\",\"reason\":\"ColumnRef with field type is not supported, sig=NEString field_type=15\"},{\"op\":\"unsupported\",\"reason\":\"ColumnRef with field type is not supported, sig=NEString field_type=15\"}],\"op\":\"or\"},{\"col\":\"id\",\"op\":\"greater\",\"value\":\"?\"}],\"op\":\"and\"}},\"query_read_bytes\":106567779,\"read_mode\":\"Bitmap\",\"region_num_of_instance\":[{\"instance_id\":\"db-tiflash-0.db-tiflash-peer.tidb10518506906906971664.svc:3930\",\"region_num\":39}],\"remote_max_stream_cost_ms\":\"79.998ms\",\"remote_min_stream_cost_ms\":\"79.998ms\",\"rs_dmfile_read_with_all\":0,\"rs_pack_filter_all\":0,\"rs_pack_filter_all_null\":0,\"rs_pack_filter_check_time\":\"1.480ms\",\"rs_pack_filter_none\":1008,\"rs_pack_filter_some\":216}},{\"id\":\"TopN_52\",\"type\":\"TopN\",\"children\":[\"Selection_51\"],\"outbound_rows\":500,\"outbound_blocks\":1,\"outbound_bytes\":44960,\"outbound_allocated_bytes\":53248,\"concurrency\":32,\"execution_time_ns\":89997426}],\"host\":\"db-tiflash-0.db-tiflash-peer.tidb10518506906906971664.svc:3930\",\"task_init_timestamp\":1772680780318360000,\"task_start_timestamp\":1772680780321330000,\"task_end_timestamp\":1772680780409195000,\"compile_start_timestamp\":1772680780318604000,\"compile_end_timestamp\":1772680780321318000,\"read_wait_index_start_timestamp\":1772680780320297000,\"read_wait_index_end_timestamp\":1772680780320332000,\"local_input_bytes\":75914204,\"remote_input_bytes\":207098,\"output_bytes\":0,\"status\":\"FINISHED\",\"error_message\":\"\",\"cpu_ru\":13.333333333333334,\"read_ru\":2091.8523559570312,\"memory_peak\":198287670,\"extra_info\":{\"cpu_execute_time_ns\":39998858,\"cpu_pending_time_ns\":0,\"io_execute_time_ns\":0,\"io_pending_time_ns\":0,\"await_time_ns\":79997710,\"wait_for_notify_time_ns\":4209879560}}"] [source="mpp_task_tracing MPP<gather_id:1, query_ts:1772680780316129293, local_query_id:18885517, server_id:1490, start_ts:464697630464938873,task_id:1>"] [thread_id=8526]
[2026/03/05 03:19:40.317 +00:00] [INFO] [local_mpp_coordinator.go:222] ["Dispatch mpp task"] [timestamp=464697630464938873] [ID=1] [QueryTs=1772680780316129293] [LocalQueryId=18885517] [ServerID=1490] [address=db-tiflash-0.db-tiflash-peer.tidb10518506906906971664.svc:3930] [plan="Table(dh_account_basic)->Sel([eq(dh_app_250.dh_account_basic.site_code, 250) eq(dh_app_250.dh_account_basic.register_currency, BRL) or(ne(dh_app_250.dh_account_basic.mtpush_id, ), ne(dh_app_250.dh_account_basic.jg_web_id, ))])->TopN([dh_app_250.dh_account_basic.id],0,500)->Send(-1, )"] [mpp-version=2] [exchange-compression-mode=NONE] [GatherID=1] [resource_group=default]
[2026/03/05 03:19:40.317 +00:00] [INFO] [local_mpp_coordinator.go:222] ["Dispatch mpp task"] [timestamp=464697630464938873] [ID=2] [QueryTs=1772680780316129293] [LocalQueryId=18885517] [ServerID=1490] [address=db-tiflash-3.db-tiflash-peer.tidb10518506906906971664.svc:3930] [plan="Table(dh_account_basic)->Sel([eq(dh_app_250.dh_account_basic.site_code, 250) eq(dh_app_250.dh_account_basic.register_currency, BRL) or(ne(dh_app_250.dh_account_basic.mtpush_id, ), ne(dh_app_250.dh_account_basic.jg_web_id, ))])->TopN([dh_app_250.dh_account_basic.id],0,500)->Send(-1, )"] [mpp-version=2] [exchange-compression-mode=NONE] [GatherID=1] [resource_group=default]
[2026/03/05 03:19:40.317 +00:00] [INFO] [local_mpp_coordinator.go:222] ["Dispatch mpp task"] [timestamp=464697630464938873] [ID=3] [QueryTs=1772680780316129293] [LocalQueryId=18885517] [ServerID=1490] [address=db-tiflash-1.db-tiflash-peer.tidb10518506906906971664.svc:3930] [plan="Table(dh_account_basic)->Sel([eq(dh_app_250.dh_account_basic.site_code, 250) eq(dh_app_250.dh_account_basic.register_currency, BRL) or(ne(dh_app_250.dh_account_basic.mtpush_id, ), ne(dh_app_250.dh_account_basic.jg_web_id, ))])->TopN([dh_app_250.dh_account_basic.id],0,500)->Send(-1, )"] [mpp-version=2] [exchange-compression-mode=NONE] [GatherID=1] [resource_group=default]
[2026/03/05 03:19:40.317 +00:00] [INFO] [local_mpp_coordinator.go:222] ["Dispatch mpp task"] [timestamp=464697630464938873] [ID=4] [QueryTs=1772680780316129293] [LocalQueryId=18885517] [ServerID=1490] [address=db-tiflash-2.db-tiflash-peer.tidb10518506906906971664.svc:3930] [plan="Table(dh_account_basic)->Sel([eq(dh_app_250.dh_account_basic.site_code, 250) eq(dh_app_250.dh_account_basic.register_currency, BRL) or(ne(dh_app_250.dh_account_basic.mtpush_id, ), ne(dh_app_250.dh_account_basic.jg_web_id, ))])->TopN([dh_app_250.dh_account_basic.id],0,500)->Send(-1, )"] [mpp-version=2] [exchange-compression-mode=NONE] [GatherID=1] [resource_group=default]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we log down the sql_digest in MPPTaskStatistics.cpp, then we can

  1. use loki filter as follow and directly collect the sql_digest from the returned rows
 |= `MPPTaskStatistics` |~ `TopN`

If we only log down the sql_digest in Handling mpp dispatch request, then we have to

  1. filter logging with "MPPTaskStatistics.cpp" and "TopN", then collect start_ts
  2. filter the logging with "Handling mpp dispatch request" and the collected start_ts, then collect the "sql_digest" values.

R"(,"task_init_timestamp":{},"task_start_timestamp":{},"task_end_timestamp":{})"
R"(,"compile_start_timestamp":{},"compile_end_timestamp":{})"
R"(,"read_wait_index_start_timestamp":{},"read_wait_index_end_timestamp":{})"
Expand All @@ -121,6 +126,10 @@ void MPPTaskStatistics::logTracingJson()
sender_executor_id,
executor_statistics_collector.profilesToJson(),
host,
connection_id,
connection_alias,
sql_digest,
plan_digest,
toNanoseconds(task_init_timestamp),
toNanoseconds(task_start_timestamp),
toNanoseconds(task_end_timestamp),
Expand Down
4 changes: 4 additions & 0 deletions dbms/src/Flash/Mpp/MPPTaskStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ class MPPTaskStatistics
// executor dag
bool is_root = false;
String sender_executor_id;
UInt64 connection_id = 0;
String connection_alias;
String sql_digest;
String plan_digest;

// resource
RUConsumption ru_info{.cpu_ru = 0.0, .cpu_time_ns = 0, .read_ru = 0.0, .read_bytes = 0};
Expand Down