From c163fad36358e1d310a1c796221ed6b6e2f18eba Mon Sep 17 00:00:00 2001 From: Leto_b Date: Thu, 25 Sep 2025 16:11:45 +0800 Subject: [PATCH] update data-sync params --- .../Table/User-Manual/Data-Sync_apache.md | 4 +- .../Table/User-Manual/Data-Sync_timecho.md | 6 +- .../Tree/User-Manual/Data-Sync_apache.md | 69 +++++++------ .../Tree/User-Manual/Data-Sync_timecho.md | 75 +++++++++------ .../User-Manual/Data-Sync_apache.md | 8 +- .../User-Manual/Data-Sync_timecho.md | 6 +- .../latest/User-Manual/Data-Sync_apache.md | 69 +++++++------ .../latest/User-Manual/Data-Sync_timecho.md | 75 +++++++++------ .../Table/User-Manual/Data-Sync_apache.md | 4 +- .../Table/User-Manual/Data-Sync_timecho.md | 16 ++-- .../Tree/User-Manual/Data-Sync_apache.md | 96 ++++++++++--------- .../Tree/User-Manual/Data-Sync_timecho.md | 77 ++++++++------- .../User-Manual/Data-Sync_apache.md | 4 +- .../User-Manual/Data-Sync_timecho.md | 16 ++-- .../latest/User-Manual/Data-Sync_apache.md | 96 ++++++++++--------- .../latest/User-Manual/Data-Sync_timecho.md | 77 ++++++++------- 16 files changed, 392 insertions(+), 306 deletions(-) diff --git a/src/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md b/src/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md index 6c04f4d1c..ee9f3085d 100644 --- a/src/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md +++ b/src/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md @@ -534,7 +534,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :----------------------------------------------------------- | :------- | :------------ | | sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | @@ -552,7 +552,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | diff --git a/src/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md b/src/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md index 1e07c55f0..00e64f82d 100644 --- a/src/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md +++ b/src/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md @@ -584,7 +584,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------| :------- | :------------ | | sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | @@ -601,7 +601,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :------------ | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | | compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | @@ -615,7 +615,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | diff --git a/src/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md b/src/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md index 696832a20..63575991b 100644 --- a/src/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md +++ b/src/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md @@ -480,53 +480,62 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 ## 5. Reference: parameter description -### 5.1 source parameter(V1.3.3) +### 5.1 source parameter | key | value | value range | required or not | default value | | :------------------------------ | :----------------------------------------------------------- | :------------------------------------- | :------- | :------------- | | source | iotdb-source | String: iotdb-source | Required | - | | inclusion | Used to specify the range of data to be synchronized in the data synchronization task, including data, schema, and auth | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | data.insert | | inclusion.exclusion | Used to exclude specific operations from the range specified by inclusion, reducing the amount of data synchronized | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | - | -| path | Used to filter the path pattern schema of time series and data to be synchronized / schema synchronization can only use pathpath is exact matching, parameters must be prefix paths or complete paths, i.e., cannot contain `"*"`, at most one `"**"` at the end of the path parameter | String:IoTDB pattern | Optional | root.** | -| pattern | Used to filter the path prefix of time series | String: Optional | Optional | root | +| mode.streaming | Specifies the capture source for time-series data writes. Applicable when mode.streamingis false, determining the source for capturing data.insertspecified in inclusion. Offers two strategies:- true: ​​Dynamic capture selection.​​ The system adaptively chooses between capturing individual write requests or only TsFile sealing requests based on downstream processing speed. Prioritizes capturing write requests for lower latency when processing is fast; captures only file sealing requests to avoid backlog when slow. Suitable for most scenarios, balancing latency and throughput optimally.- false: ​​Fixed batch capture.​​ Captures only TsFile sealing requests. Suitable for resource-constrained scenarios to reduce system load. Note: The snapshot data captured upon pipe startup is only provided to downstream processing in file format. | Boolean: true / false | 否 | true | +| mode.strict | Determines the strictness when filtering data using time/ path/ database-name/ table-nameparameters:- true: ​​Strict filtering.​​ The system strictly filters captured data according to the given conditions, ensuring only matching data is selected.- false: ​​Non-strict filtering.​​ The system may include some extra data during filtering. Suitable for performance-sensitive scenarios to reduce CPU and I/O consumption. | Boolean: true / false | Optional | true | +| mode.snapshot | Determines the capture mode for time-series data, affecting the dataspecified in inclusion. Offers two modes:- true: ​​Static data capture.​​ Upon pipe startup, a one-time data snapshot is captured. ​​The pipe will automatically terminate (DROP PIPE SQL is executed automatically) after the snapshot data is fully consumed.​​- false: ​​Dynamic data capture.​​ In addition to capturing a snapshot upon startup, the pipe continuously captures subsequent data changes. The pipe runs continuously to handle the dynamic data stream. | Boolean: true / false | Optional | false | +| path | Can be specified when the user connects with sql_dialectset to tree. For upgraded user pipes, the default sql_dialectis tree. This parameter determines the capture scope for time-series data, affecting the dataspecified in inclusion, as well as some sequence-related metadata. Data is selected into the streaming pipe if its tree model path matches the specified path. | String: IoTDB-standard tree path pattern, wildcards allowed | Optional | root.** | | start-time | The start event time for synchronizing all data, including start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MIN_VALUE | | end-time | The end event time for synchronizing all data, including end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MAX_VALUE | -| realtime.mode | The extraction mode for newly inserted data (after pipe creation) | String: batch | Optional | batch | | forwarding-pipe-requests | Whether to forward data written by other Pipes (usually data synchronization) | Boolean: true | Optional | true | -| history.loose-range | When transferring TsFile, whether to relax the range of historical data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| realtime.loose-range | When transferring TsFile, whether to relax the range of real-time data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| mods.enable | Whether to send the mods file of tsfile | Boolean: true / false | Optional | false | -> 💎 **Explanation**:To maintain compatibility with lower versions, history.enable, history.start-time, history.end-time, realtime.enable can still be used, but they are not recommended in the new version. +> 💎 **Note:** The difference between the values of true and false for the data extraction mode `mode.streaming` > -> 💎 **Explanation: Differences between Stream and Batch Data Extraction Modes** -> - **stream (recommended)**: In this mode, tasks process and send data in real-time. It is characterized by high timeliness and low throughput. -> - **batch**: In this mode, tasks process and send data in batches (according to the underlying data files). It is characterized by low timeliness and high throughput. - +> - True (recommended): Under this value, the task will process and send the data in real-time. Its characteristics are high timeliness and low throughput. +> - False: Under this value, the task will process and send the data in batches (according to the underlying data files). Its characteristics are low timeliness and high throughput. ### 5.2 sink parameter -> In versions 1.3.3 and above, when only the sink is included, the additional "with sink" prefix is no longer required. #### iotdb-thrift-sink - -| key | value | value Range | required or not | Default Value | -| :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Required | | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :----------------------------------------------------------- | :------- | :------------ | +| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | The maximum number of bytes allowed to be transmitted per second. The compressed bytes (such as after compression) are calculated. If it is less than 0, there is no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | #### iotdb-thrift-ssl-sink -| key | value | value Range | required or not | Default Value | -| :---------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Required | - | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | -| ssl.trust-store-path | The trust store certificate path required to connect to the target DataNode | String: certificate directory name, when configured as a relative directory, it is relative to the IoTDB root directory. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667'| Required | - | -| ssl.trust-store-pwd | The trust store certificate password required to connect to the target DataNode | Integer | Required | - | + +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | +| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | Maximum bytes allowed per second for transmission (calculated after compression). Set to a value less than 0 for no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| ssl.trust-store-path | Path to the trust store certificate for SSL connection. | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| ssl.trust-store-pwd | Password for the trust store certificate. | Integer | Yes | - | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | diff --git a/src/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md b/src/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md index 280980344..39fa9f999 100644 --- a/src/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md +++ b/src/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md @@ -573,37 +573,37 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | source | iotdb-source | String: iotdb-source | Required | - | | inclusion | Used to specify the range of data to be synchronized in the data synchronization task, including data, schema, and auth | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | data.insert | | inclusion.exclusion | Used to exclude specific operations from the range specified by inclusion, reducing the amount of data synchronized | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | - | -| path | Used to filter the path pattern schema of time series and data to be synchronized / schema synchronization can only use pathpath is exact matching, parameters must be prefix paths or complete paths, i.e., cannot contain `"*"`, at most one `"**"` at the end of the path parameter | String:IoTDB pattern | Optional | root.** | -| pattern | Used to filter the path prefix of time series | String: Optional | Optional | root | +| mode.streaming | Specifies the capture source for time-series data writes. Applicable when mode.streamingis false, determining the source for capturing data.insertspecified in inclusion. Offers two strategies:- true: ​​Dynamic capture selection.​​ The system adaptively chooses between capturing individual write requests or only TsFile sealing requests based on downstream processing speed. Prioritizes capturing write requests for lower latency when processing is fast; captures only file sealing requests to avoid backlog when slow. Suitable for most scenarios, balancing latency and throughput optimally.- false: ​​Fixed batch capture.​​ Captures only TsFile sealing requests. Suitable for resource-constrained scenarios to reduce system load. Note: The snapshot data captured upon pipe startup is only provided to downstream processing in file format. | Boolean: true / false | 否 | true | +| mode.strict | Determines the strictness when filtering data using time/ path/ database-name/ table-nameparameters:- true: ​​Strict filtering.​​ The system strictly filters captured data according to the given conditions, ensuring only matching data is selected.- false: ​​Non-strict filtering.​​ The system may include some extra data during filtering. Suitable for performance-sensitive scenarios to reduce CPU and I/O consumption. | Boolean: true / false | Optional | true | +| mode.snapshot | Determines the capture mode for time-series data, affecting the dataspecified in inclusion. Offers two modes:- true: ​​Static data capture.​​ Upon pipe startup, a one-time data snapshot is captured. ​​The pipe will automatically terminate (DROP PIPE SQL is executed automatically) after the snapshot data is fully consumed.​​- false: ​​Dynamic data capture.​​ In addition to capturing a snapshot upon startup, the pipe continuously captures subsequent data changes. The pipe runs continuously to handle the dynamic data stream. | Boolean: true / false | Optional | false | +| path | Can be specified when the user connects with sql_dialectset to tree. For upgraded user pipes, the default sql_dialectis tree. This parameter determines the capture scope for time-series data, affecting the dataspecified in inclusion, as well as some sequence-related metadata. Data is selected into the streaming pipe if its tree model path matches the specified path. | String: IoTDB-standard tree path pattern, wildcards allowed | Optional | root.** | | start-time | The start event time for synchronizing all data, including start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MIN_VALUE | | end-time | The end event time for synchronizing all data, including end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MAX_VALUE | -| realtime.mode | The extraction mode for newly inserted data (after pipe creation) | String: stream, batch | Optional | stream | -| forwarding-pipe-requests | Whether to forward data written by other Pipes (usually data synchronization) | Boolean: true, false | Optional | true | -| history.loose-range | When transferring TsFile, whether to relax the range of historical data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| realtime.loose-range | When transferring TsFile, whether to relax the range of real-time data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| mods.enable | Whether to send the mods file of tsfile | Boolean: true / false | Optional | false | +| forwarding-pipe-requests | Whether to forward data written by other Pipes (usually data synchronization) | Boolean: true | Optional | true | -> 💎 **Explanation**:To maintain compatibility with lower versions, history.enable, history.start-time, history.end-time, realtime.enable can still be used, but they are not recommended in the new version. +> 💎 **Note:** The difference between the values of true and false for the data extraction mode `mode.streaming` > -> 💎 **Explanation: Differences between Stream and Batch Data Extraction Modes** -> - **stream (recommended)**: In this mode, tasks process and send data in real-time. It is characterized by high timeliness and low throughput. -> - **batch**: In this mode, tasks process and send data in batches (according to the underlying data files). It is characterized by low timeliness and high throughput. - +> - True (recommended): Under this value, the task will process and send the data in real-time. Its characteristics are high timeliness and low throughput. +> - False: Under this value, the task will process and send the data in batches (according to the underlying data files). Its characteristics are low timeliness and high throughput. ### 5.2 sink parameter -> In versions 1.3.3 and above, when only the sink is included, the additional "with sink" prefix is no longer required. - #### iotdb-thrift-sink - -| key | value | value Range | required or not | Default Value | -| :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Required | | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :----------------------------------------------------------- | :------- | :------------ | +| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | The maximum number of bytes allowed to be transmitted per second. The compressed bytes (such as after compression) are calculated. If it is less than 0, there is no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | #### iotdb-air-gap-sink @@ -611,16 +611,29 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | Required | - | | node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB | String. Example: :'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | Maximum bytes allowed per second for transmission (calculated after compression). Set to a value less than 0 for no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | | air-gap.handshake-timeout-ms | The timeout duration of the handshake request when the sender and receiver first attempt to establish a connection, unit: ms | Integer | Optional | 5000 | #### iotdb-thrift-ssl-sink -| key | value | value Range | required or not | Default Value | -| :---------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Required | - | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | -| ssl.trust-store-path | The trust store certificate path required to connect to the target DataNode | String: certificate directory name, when configured as a relative directory, it is relative to the IoTDB root directory. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667'| Required | - | -| ssl.trust-store-pwd | The trust store certificate password required to connect to the target DataNode | Integer | Required | - | +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | +| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | Maximum bytes allowed per second for transmission (calculated after compression). Set to a value less than 0 for no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| ssl.trust-store-path | Path to the trust store certificate for SSL connection. | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| ssl.trust-store-pwd | Password for the trust store certificate. | Integer | Yes | - | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | diff --git a/src/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md b/src/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md index 6c04f4d1c..77cc2c688 100644 --- a/src/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md +++ b/src/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md @@ -534,7 +534,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :----------------------------------------------------------- | :------- | :------------ | | sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | @@ -542,7 +542,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | | compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | | rate-limit-bytes-per-second | The maximum number of bytes allowed to be transmitted per second. The compressed bytes (such as after compression) are calculated. If it is less than 0, there is no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | -| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | | format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | #### iotdb-thrift-ssl-sink @@ -552,7 +552,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | @@ -560,7 +560,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | | compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | | rate-limit-bytes-per-second | Maximum bytes allowed per second for transmission (calculated after compression). Set to a value less than 0 for no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | -| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | | ssl.trust-store-path | Path to the trust store certificate for SSL connection. | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | | ssl.trust-store-pwd | Password for the trust store certificate. | Integer | Yes | - | | format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | \ No newline at end of file diff --git a/src/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md b/src/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md index 1e07c55f0..00e64f82d 100644 --- a/src/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md +++ b/src/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md @@ -584,7 +584,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------| :------- | :------------ | | sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | @@ -601,7 +601,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :------------ | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | | compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | @@ -615,7 +615,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | | node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | -| user/usename | Usename for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| user/username | username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | | password | Password for the username. | String | No | root | | batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | | batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | diff --git a/src/UserGuide/latest/User-Manual/Data-Sync_apache.md b/src/UserGuide/latest/User-Manual/Data-Sync_apache.md index 696832a20..63575991b 100644 --- a/src/UserGuide/latest/User-Manual/Data-Sync_apache.md +++ b/src/UserGuide/latest/User-Manual/Data-Sync_apache.md @@ -480,53 +480,62 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 ## 5. Reference: parameter description -### 5.1 source parameter(V1.3.3) +### 5.1 source parameter | key | value | value range | required or not | default value | | :------------------------------ | :----------------------------------------------------------- | :------------------------------------- | :------- | :------------- | | source | iotdb-source | String: iotdb-source | Required | - | | inclusion | Used to specify the range of data to be synchronized in the data synchronization task, including data, schema, and auth | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | data.insert | | inclusion.exclusion | Used to exclude specific operations from the range specified by inclusion, reducing the amount of data synchronized | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | - | -| path | Used to filter the path pattern schema of time series and data to be synchronized / schema synchronization can only use pathpath is exact matching, parameters must be prefix paths or complete paths, i.e., cannot contain `"*"`, at most one `"**"` at the end of the path parameter | String:IoTDB pattern | Optional | root.** | -| pattern | Used to filter the path prefix of time series | String: Optional | Optional | root | +| mode.streaming | Specifies the capture source for time-series data writes. Applicable when mode.streamingis false, determining the source for capturing data.insertspecified in inclusion. Offers two strategies:- true: ​​Dynamic capture selection.​​ The system adaptively chooses between capturing individual write requests or only TsFile sealing requests based on downstream processing speed. Prioritizes capturing write requests for lower latency when processing is fast; captures only file sealing requests to avoid backlog when slow. Suitable for most scenarios, balancing latency and throughput optimally.- false: ​​Fixed batch capture.​​ Captures only TsFile sealing requests. Suitable for resource-constrained scenarios to reduce system load. Note: The snapshot data captured upon pipe startup is only provided to downstream processing in file format. | Boolean: true / false | 否 | true | +| mode.strict | Determines the strictness when filtering data using time/ path/ database-name/ table-nameparameters:- true: ​​Strict filtering.​​ The system strictly filters captured data according to the given conditions, ensuring only matching data is selected.- false: ​​Non-strict filtering.​​ The system may include some extra data during filtering. Suitable for performance-sensitive scenarios to reduce CPU and I/O consumption. | Boolean: true / false | Optional | true | +| mode.snapshot | Determines the capture mode for time-series data, affecting the dataspecified in inclusion. Offers two modes:- true: ​​Static data capture.​​ Upon pipe startup, a one-time data snapshot is captured. ​​The pipe will automatically terminate (DROP PIPE SQL is executed automatically) after the snapshot data is fully consumed.​​- false: ​​Dynamic data capture.​​ In addition to capturing a snapshot upon startup, the pipe continuously captures subsequent data changes. The pipe runs continuously to handle the dynamic data stream. | Boolean: true / false | Optional | false | +| path | Can be specified when the user connects with sql_dialectset to tree. For upgraded user pipes, the default sql_dialectis tree. This parameter determines the capture scope for time-series data, affecting the dataspecified in inclusion, as well as some sequence-related metadata. Data is selected into the streaming pipe if its tree model path matches the specified path. | String: IoTDB-standard tree path pattern, wildcards allowed | Optional | root.** | | start-time | The start event time for synchronizing all data, including start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MIN_VALUE | | end-time | The end event time for synchronizing all data, including end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MAX_VALUE | -| realtime.mode | The extraction mode for newly inserted data (after pipe creation) | String: batch | Optional | batch | | forwarding-pipe-requests | Whether to forward data written by other Pipes (usually data synchronization) | Boolean: true | Optional | true | -| history.loose-range | When transferring TsFile, whether to relax the range of historical data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| realtime.loose-range | When transferring TsFile, whether to relax the range of real-time data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| mods.enable | Whether to send the mods file of tsfile | Boolean: true / false | Optional | false | -> 💎 **Explanation**:To maintain compatibility with lower versions, history.enable, history.start-time, history.end-time, realtime.enable can still be used, but they are not recommended in the new version. +> 💎 **Note:** The difference between the values of true and false for the data extraction mode `mode.streaming` > -> 💎 **Explanation: Differences between Stream and Batch Data Extraction Modes** -> - **stream (recommended)**: In this mode, tasks process and send data in real-time. It is characterized by high timeliness and low throughput. -> - **batch**: In this mode, tasks process and send data in batches (according to the underlying data files). It is characterized by low timeliness and high throughput. - +> - True (recommended): Under this value, the task will process and send the data in real-time. Its characteristics are high timeliness and low throughput. +> - False: Under this value, the task will process and send the data in batches (according to the underlying data files). Its characteristics are low timeliness and high throughput. ### 5.2 sink parameter -> In versions 1.3.3 and above, when only the sink is included, the additional "with sink" prefix is no longer required. #### iotdb-thrift-sink - -| key | value | value Range | required or not | Default Value | -| :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Required | | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :----------------------------------------------------------- | :------- | :------------ | +| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | The maximum number of bytes allowed to be transmitted per second. The compressed bytes (such as after compression) are calculated. If it is less than 0, there is no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | #### iotdb-thrift-ssl-sink -| key | value | value Range | required or not | Default Value | -| :---------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Required | - | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | -| ssl.trust-store-path | The trust store certificate path required to connect to the target DataNode | String: certificate directory name, when configured as a relative directory, it is relative to the IoTDB root directory. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667'| Required | - | -| ssl.trust-store-pwd | The trust store certificate password required to connect to the target DataNode | Integer | Required | - | + +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | +| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | Maximum bytes allowed per second for transmission (calculated after compression). Set to a value less than 0 for no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| ssl.trust-store-path | Path to the trust store certificate for SSL connection. | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| ssl.trust-store-pwd | Password for the trust store certificate. | Integer | Yes | - | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | diff --git a/src/UserGuide/latest/User-Manual/Data-Sync_timecho.md b/src/UserGuide/latest/User-Manual/Data-Sync_timecho.md index 280980344..39fa9f999 100644 --- a/src/UserGuide/latest/User-Manual/Data-Sync_timecho.md +++ b/src/UserGuide/latest/User-Manual/Data-Sync_timecho.md @@ -573,37 +573,37 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | source | iotdb-source | String: iotdb-source | Required | - | | inclusion | Used to specify the range of data to be synchronized in the data synchronization task, including data, schema, and auth | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | data.insert | | inclusion.exclusion | Used to exclude specific operations from the range specified by inclusion, reducing the amount of data synchronized | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | Optional | - | -| path | Used to filter the path pattern schema of time series and data to be synchronized / schema synchronization can only use pathpath is exact matching, parameters must be prefix paths or complete paths, i.e., cannot contain `"*"`, at most one `"**"` at the end of the path parameter | String:IoTDB pattern | Optional | root.** | -| pattern | Used to filter the path prefix of time series | String: Optional | Optional | root | +| mode.streaming | Specifies the capture source for time-series data writes. Applicable when mode.streamingis false, determining the source for capturing data.insertspecified in inclusion. Offers two strategies:- true: ​​Dynamic capture selection.​​ The system adaptively chooses between capturing individual write requests or only TsFile sealing requests based on downstream processing speed. Prioritizes capturing write requests for lower latency when processing is fast; captures only file sealing requests to avoid backlog when slow. Suitable for most scenarios, balancing latency and throughput optimally.- false: ​​Fixed batch capture.​​ Captures only TsFile sealing requests. Suitable for resource-constrained scenarios to reduce system load. Note: The snapshot data captured upon pipe startup is only provided to downstream processing in file format. | Boolean: true / false | 否 | true | +| mode.strict | Determines the strictness when filtering data using time/ path/ database-name/ table-nameparameters:- true: ​​Strict filtering.​​ The system strictly filters captured data according to the given conditions, ensuring only matching data is selected.- false: ​​Non-strict filtering.​​ The system may include some extra data during filtering. Suitable for performance-sensitive scenarios to reduce CPU and I/O consumption. | Boolean: true / false | Optional | true | +| mode.snapshot | Determines the capture mode for time-series data, affecting the dataspecified in inclusion. Offers two modes:- true: ​​Static data capture.​​ Upon pipe startup, a one-time data snapshot is captured. ​​The pipe will automatically terminate (DROP PIPE SQL is executed automatically) after the snapshot data is fully consumed.​​- false: ​​Dynamic data capture.​​ In addition to capturing a snapshot upon startup, the pipe continuously captures subsequent data changes. The pipe runs continuously to handle the dynamic data stream. | Boolean: true / false | Optional | false | +| path | Can be specified when the user connects with sql_dialectset to tree. For upgraded user pipes, the default sql_dialectis tree. This parameter determines the capture scope for time-series data, affecting the dataspecified in inclusion, as well as some sequence-related metadata. Data is selected into the streaming pipe if its tree model path matches the specified path. | String: IoTDB-standard tree path pattern, wildcards allowed | Optional | root.** | | start-time | The start event time for synchronizing all data, including start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MIN_VALUE | | end-time | The end event time for synchronizing all data, including end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | Optional | Long.MAX_VALUE | -| realtime.mode | The extraction mode for newly inserted data (after pipe creation) | String: stream, batch | Optional | stream | -| forwarding-pipe-requests | Whether to forward data written by other Pipes (usually data synchronization) | Boolean: true, false | Optional | true | -| history.loose-range | When transferring TsFile, whether to relax the range of historical data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| realtime.loose-range | When transferring TsFile, whether to relax the range of real-time data (before the creation of the pipe). "": Do not relax the range, select data strictly according to the set conditions. "time": Relax the time range to avoid splitting TsFile, which can improve synchronization efficiency. "path": Relax the path range to avoid splitting TsFile, which can improve synchronization efficiency. "time, path", "path, time", "all": Relax all ranges to avoid splitting TsFile, which can improve synchronization efficiency. | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | Optional |""| -| mods.enable | Whether to send the mods file of tsfile | Boolean: true / false | Optional | false | +| forwarding-pipe-requests | Whether to forward data written by other Pipes (usually data synchronization) | Boolean: true | Optional | true | -> 💎 **Explanation**:To maintain compatibility with lower versions, history.enable, history.start-time, history.end-time, realtime.enable can still be used, but they are not recommended in the new version. +> 💎 **Note:** The difference between the values of true and false for the data extraction mode `mode.streaming` > -> 💎 **Explanation: Differences between Stream and Batch Data Extraction Modes** -> - **stream (recommended)**: In this mode, tasks process and send data in real-time. It is characterized by high timeliness and low throughput. -> - **batch**: In this mode, tasks process and send data in batches (according to the underlying data files). It is characterized by low timeliness and high throughput. - +> - True (recommended): Under this value, the task will process and send the data in real-time. Its characteristics are high timeliness and low throughput. +> - False: Under this value, the task will process and send the data in batches (according to the underlying data files). Its characteristics are low timeliness and high throughput. ### 5.2 sink parameter -> In versions 1.3.3 and above, when only the sink is included, the additional "with sink" prefix is no longer required. - #### iotdb-thrift-sink - -| key | value | value Range | required or not | Default Value | -| :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Required | | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :----------------------------------------------------------- | :------- | :------------ | +| sink | iotdb-thrift-sink or iotdb-thrift-async-sink | String: iotdb-thrift-sink or iotdb-thrift-async-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | The maximum number of bytes allowed to be transmitted per second. The compressed bytes (such as after compression) are calculated. If it is less than 0, there is no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | #### iotdb-air-gap-sink @@ -611,16 +611,29 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | :--------------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | Required | - | | node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB | String. Example: :'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | Maximum bytes allowed per second for transmission (calculated after compression). Set to a value less than 0 for no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | | air-gap.handshake-timeout-ms | The timeout duration of the handshake request when the sender and receiver first attempt to establish a connection, unit: ms | Integer | Optional | 5000 | #### iotdb-thrift-ssl-sink -| key | value | value Range | required or not | Default Value | -| :---------------------- | :----------------------------------------------------------- | :----------------------------------------------------------- | :------- | :----------- | -| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Required | - | -| node-urls | The URL of the data service port of any DataNode nodes on the target IoTDB (please note that synchronization tasks do not support forwarding to its own service) | String. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Required | - | -| batch.enable | Whether to enable batched log transmission mode to improve transmission throughput and reduce IOPS | Boolean: true, false | Optional | true | -| batch.max-delay-seconds | Effective when batched log transmission mode is enabled, it represents the maximum waiting time for a batch of data before sending (unit: s) | Integer | Optional | 1 | -| batch.size-bytes | Effective when batched log transmission mode is enabled, it represents the maximum batch size for a batch of data (unit: byte) | Long | Optional | 16*1024*1024 | -| ssl.trust-store-path | The trust store certificate path required to connect to the target DataNode | String: certificate directory name, when configured as a relative directory, it is relative to the IoTDB root directory. Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667'| Required | - | -| ssl.trust-store-pwd | The trust store certificate password required to connect to the target DataNode | Integer | Required | - | +| **Parameter** | **Description** | Value Range | Required | Default Value | +|:----------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------| :------------ | +| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | Yes | - | +| node-urls | URLs of the DataNode service ports on the target IoTDB. (please note that the synchronization task does not support forwarding to its own service). | String. Example:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| user/username | Username for connecting to the target IoTDB. Must have appropriate permissions. | String | No | root | +| password | Password for the username. | String | No | root | +| batch.enable | Enables batch mode for log transmission to improve throughput and reduce IOPS. | Boolean: true, false | No | true | +| batch.max-delay-seconds | Maximum delay (in seconds) for batch transmission. | Integer | No | 1 | +| batch.size-bytes | Maximum batch size (in bytes) for batch transmission. | Long | No | 16*1024*1024 | +| compressor | The selected RPC compression algorithm. Multiple algorithms can be configured and will be adopted in sequence for each request. | String: snappy / gzip / lz4 / zstd / lzma2 | No | "" | +| compressor.zstd.level | When the selected RPC compression algorithm is zstd, this parameter can be used to additionally configure the compression level of the zstd algorithm. | Int: [-131072, 22] | No | 3 | +| rate-limit-bytes-per-second | Maximum bytes allowed per second for transmission (calculated after compression). Set to a value less than 0 for no limit. | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | No | -1 | +| load-tsfile-strategy | When synchronizing file data, ​​whether the receiver waits for the local load tsfile operation to complete before responding to the sender​​:
​​sync​​: Wait for the local load tsfile operation to complete before returning the response.
​​async​​: Do not wait for the local load tsfile operation to complete; return the response immediately. | String: sync / async | No | sync | +| ssl.trust-store-path | Path to the trust store certificate for SSL connection. | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | Yes | - | +| ssl.trust-store-pwd | Password for the trust store certificate. | Integer | Yes | - | +| format | The payload formats for data transmission include the following options:
- hybrid: The format depends on what is passed from the processor (either tsfile or tablet), and the sink performs no conversion.
- tsfile: Data is forcibly converted to tsfile format before transmission. This is suitable for scenarios like data file backup.
- tablet: Data is forcibly converted to tsfile format before transmission. This is useful for data synchronization when the sender and receiver have incompatible data types (to minimize errors). | String: hybrid / tsfile / tablet | No | hybrid | diff --git a/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md b/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md index 34fbaa961..f55e0cccb 100644 --- a/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md +++ b/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_apache.md @@ -525,7 +525,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------|----------|--------------| | sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | @@ -543,7 +543,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | diff --git a/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md b/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md index f814c76b1..bfcac2816 100644 --- a/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md +++ b/src/zh/UserGuide/Master/Table/User-Manual/Data-Sync_timecho.md @@ -578,7 +578,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------| -------- | ------------ | | sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | @@ -592,25 +592,25 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 #### iotdb-air-gap-sink -| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | -| ---------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- | +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|------------------------------| ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | | compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | | rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | -| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | | air-gap.handshake-timeout-ms | 发送端与接收端在首次尝试建立连接时握手请求的超时时长,单位:毫秒 | Integer | 选填 | 5000 | #### iotdb-thrift-ssl-sink -| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | -| --------------------------- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | diff --git a/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md b/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md index ef41e2e37..a12c39a04 100644 --- a/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md +++ b/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_apache.md @@ -476,54 +476,62 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 ## 5. 参考:参数说明 -### 5.1 source 参数(V1.3.3) - -| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | -| ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------------- | -| source | iotdb-source | String: iotdb-source | 必填 | - | -| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | -| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | -| mode | 用于在每个 data region 发送完毕时分别发送结束事件,并在全部 data region 发送完毕后自动 drop pipe。query:结束,subscribe:不结束。 | String: query / subscribe | 选填 | subscribe | -| path | 用于筛选待同步的时间序列及其相关元数据 / 数据的路径模式元数据同步只能用pathpath 是精确匹配,参数必须为前缀路径或完整路径,即不能含有 `"*"`,最多在 path参数的尾部含有一个 `"**"` | String:IoTDB 的 pattern | 选填 | root.** | -| pattern | 用于筛选时间序列的路径前缀 | String: 任意的时间序列前缀 | 选填 | root | -| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | -| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | -| realtime.mode | 新插入数据(pipe 创建后)的抽取模式 | String: batch | 选填 | batch | -| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true | 选填 | true | -| history.loose-range | tsfile传输时,是否放宽历史数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| realtime.loose-range | tsfile传输时,是否放宽实时数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| mods.enable | 是否发送 tsfile 的 mods 文件 | Boolean: true / false | 选填 | false | - -> 💎 **说明**:为保持低版本兼容,history.enable、history.start-time、history.end-time、realtime.enable 仍可使用,但在新版本中不推荐。 -> -> 💎 **说明:数据抽取模式 batch 的含义** -> - **batch**:该模式下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 - - -### 5.2 sink 参数 - -> 在 1.3.3 及以上的版本中,只包含sink的情况下,不再需要额外增加with sink 前缀 +### 5.1 source 参数 + +| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | +| ------------------------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------| -------- | -------------- | +| source | iotdb-source | String: iotdb-source | 必填 | - | +| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | +| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | +| mode.streaming | 此参数指定时序数据写入的捕获来源。适用于 `mode.streaming`为 `false` 模式下的场景,决定`inclusion`中`data.insert`数据的捕获来源。提供两种捕获策略:true: 动态选择捕获的类型。系统将根据下游处理速度,自适应地选择是捕获每个写入请求还是仅捕获 TsFile 文件的封口请求。当下游处理速度快时,优先捕获写入请求以减少延迟;当处理速度慢时,仅捕获文件封口请求以避免处理堆积。这种模式适用于大多数场景,能够实现处理延迟和吞吐量的最优平衡。false:固定按批捕获方式。仅捕获 TsFile 文件的封口请求,适用于资源受限的应用场景,以降低系统负载。注意,pipe 启动时捕获的快照数据只会以文件的方式供下游处理。 | Boolean: true / false | 否 | true | +| mode.strict | 在使用 time / path / database-name / table-name 参数过滤数据时,是否需要严格按照条件筛选:`true`: 严格筛选。系统将完全按照给定条件过滤筛选被捕获的数据,确保只有符合条件的数据被选中。`false`:非严格筛选。系统在筛选被捕获的数据时可能会包含一些额外的数据,适用于性能敏感的场景,可降低 CPU 和 IO 消耗。 | Boolean: true / false | 否 | true | +| mode.snapshot | 此参数决定时序数据的捕获方式,影响`inclusion`中的`data`数据。提供两种模式:`true`:静态数据捕获。启动 pipe 时,会进行一次性的数据快照捕获。当快照数据被完全消费后,**pipe 将自动终止(DROP PIPE SQL 会自动执行)**。`false`:动态数据捕获。除了在 pipe 启动时捕获快照数据外,还会持续捕获后续的数据变更。pipe 将持续运行以处理动态数据流。 | Boolean: true / false | 否 | false | +| path | 当用户连接指定的sql_dialect为tree时可以指定。对于升级上来的用户pipe,默认sql_dialect为tree。此参数决定时序数据的捕获范围,影响 inclusion中的data数据,以及部分序列相关的元数据。当数据的树模型路径能够被path匹配时,数据会被筛选出来进入流处理pipe。 | String:IoTDB标准的树路径模式,可以带通配符 | 选填 | root.** | +| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | +| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | +| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true, false | 选填 | true | + +> 💎 **说明:数据抽取模式 mode.streaming 取值 true 和 false 的差异** +> - **true(推荐)**:该取值下,任务将对数据进行实时处理、发送,其特点是高时效、低吞吐 +> - **false**:该取值下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 + + +### 5.2 sink 参数 #### iotdb-thrift-sink -| key | value | value 取值范围 | 是否必填 | 默认取值 | -| ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | -| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | -| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | -| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | -| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------|----------|--------------| +| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | +| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | +| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | +| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid | -#### iotdb-thrift-ssl-sink -| key | value | value 取值范围 | 是否必填 | 默认取值 | -| ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | -| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | -| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | -| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | -| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | -| ssl.trust-store-path | 连接目标端 DataNode 所需的 trust store 证书路径 | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| ssl.trust-store-pwd | 连接目标端 DataNode 所需的 trust store 证书密码 | Integer | 必填 | - | +#### iotdb-thrift-ssl-sink +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | +| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | +| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | +| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | +| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| ssl.trust-store-path | 连接目标端 DataNode 所需的 trust store 证书路径 | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| ssl.trust-store-pwd | 连接目标端 DataNode 所需的 trust store 证书密码 | Integer | 必填 | - | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid | diff --git a/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md b/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md index 66e4a73d9..1c30b63db 100644 --- a/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md +++ b/src/zh/UserGuide/Master/Tree/User-Manual/Data-Sync_timecho.md @@ -561,44 +561,44 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 ## 5. 参考:参数说明 -### 5.1 source 参数(V1.3.3) - -| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | -| ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------------- | -| source | iotdb-source | String: iotdb-source | 必填 | - | -| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | -| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | -| mode | 用于在每个 data region 发送完毕时分别发送结束事件,并在全部 data region 发送完毕后自动 drop pipe。query:结束,subscribe:不结束。 | String: query / subscribe | 选填 | subscribe | -| path | 用于筛选待同步的时间序列及其相关元数据 / 数据的路径模式元数据同步只能用pathpath 是精确匹配,参数必须为前缀路径或完整路径,即不能含有 `"*"`,最多在 path参数的尾部含有一个 `"**"` | String:IoTDB 的 pattern | 选填 | root.** | -| pattern | 用于筛选时间序列的路径前缀 | String: 任意的时间序列前缀 | 选填 | root | -| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | -| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | -| realtime.mode | 新插入数据(pipe创建后)的抽取模式 | String: stream, batch | 选填 | batch | -| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true, false | 选填 | true | -| history.loose-range | tsfile传输时,是否放宽历史数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| realtime.loose-range | tsfile传输时,是否放宽实时数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| mods.enable | 是否发送 tsfile 的 mods 文件 | Boolean: true / false | 选填 | false | - -> 💎 **说明**:为保持低版本兼容,history.enable、history.start-time、history.end-time、realtime.enable 仍可使用,但在新版本中不推荐。 -> -> 💎 **说明:数据抽取模式 stream 和 batch 的差异** -> - **stream(推荐)**:该模式下,任务将对数据进行实时处理、发送,其特点是高时效、低吞吐 -> - **batch**:该模式下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 +### 5.1 source 参数 + +| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | +| ------------------------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------| -------- | -------------- | +| source | iotdb-source | String: iotdb-source | 必填 | - | +| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | +| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | +| mode.streaming | 此参数指定时序数据写入的捕获来源。适用于 `mode.streaming`为 `false` 模式下的场景,决定`inclusion`中`data.insert`数据的捕获来源。提供两种捕获策略:true: 动态选择捕获的类型。系统将根据下游处理速度,自适应地选择是捕获每个写入请求还是仅捕获 TsFile 文件的封口请求。当下游处理速度快时,优先捕获写入请求以减少延迟;当处理速度慢时,仅捕获文件封口请求以避免处理堆积。这种模式适用于大多数场景,能够实现处理延迟和吞吐量的最优平衡。false:固定按批捕获方式。仅捕获 TsFile 文件的封口请求,适用于资源受限的应用场景,以降低系统负载。注意,pipe 启动时捕获的快照数据只会以文件的方式供下游处理。 | Boolean: true / false | 否 | true | +| mode.strict | 在使用 time / path / database-name / table-name 参数过滤数据时,是否需要严格按照条件筛选:`true`: 严格筛选。系统将完全按照给定条件过滤筛选被捕获的数据,确保只有符合条件的数据被选中。`false`:非严格筛选。系统在筛选被捕获的数据时可能会包含一些额外的数据,适用于性能敏感的场景,可降低 CPU 和 IO 消耗。 | Boolean: true / false | 否 | true | +| mode.snapshot | 此参数决定时序数据的捕获方式,影响`inclusion`中的`data`数据。提供两种模式:`true`:静态数据捕获。启动 pipe 时,会进行一次性的数据快照捕获。当快照数据被完全消费后,**pipe 将自动终止(DROP PIPE SQL 会自动执行)**。`false`:动态数据捕获。除了在 pipe 启动时捕获快照数据外,还会持续捕获后续的数据变更。pipe 将持续运行以处理动态数据流。 | Boolean: true / false | 否 | false | +| path | 当用户连接指定的sql_dialect为tree时可以指定。对于升级上来的用户pipe,默认sql_dialect为tree。此参数决定时序数据的捕获范围,影响 inclusion中的data数据,以及部分序列相关的元数据。当数据的树模型路径能够被path匹配时,数据会被筛选出来进入流处理pipe。 | String:IoTDB标准的树路径模式,可以带通配符 | 选填 | root.** | +| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | +| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | +| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true, false | 选填 | true | + +> 💎 **说明:数据抽取模式 mode.streaming 取值 true 和 false 的差异** +> - **true(推荐)**:该取值下,任务将对数据进行实时处理、发送,其特点是高时效、低吞吐 +> - **false**:该取值下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 ### 5.2 sink **参数** -> 在 1.3.3 及以上的版本中,只包含sink的情况下,不再需要额外增加with sink 前缀 - #### iotdb-thrift-sink -| key | value | value 取值范围 | 是否必填 | 默认取值 | -| ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | -| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | -| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | -| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | -| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| key | value | value 取值范围 | 是否必填 | 默认取值 | +|-----------------------------| ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | +| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | +| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | +| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | +| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid | #### iotdb-air-gap-sink @@ -606,6 +606,12 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | ---------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | | air-gap.handshake-timeout-ms | 发送端与接收端在首次尝试建立连接时握手请求的超时时长,单位:毫秒 | Integer | 选填 | 5000 | #### iotdb-thrift-ssl-sink @@ -614,8 +620,15 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | | batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | | ssl.trust-store-path | 连接目标端 DataNode 所需的 trust store 证书路径 | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | | ssl.trust-store-pwd | 连接目标端 DataNode 所需的 trust store 证书密码 | Integer | 必填 | - | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid | diff --git a/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md b/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md index 34fbaa961..f55e0cccb 100644 --- a/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md +++ b/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_apache.md @@ -525,7 +525,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------|----------|--------------| | sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | @@ -543,7 +543,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | diff --git a/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md b/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md index f814c76b1..bfcac2816 100644 --- a/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md +++ b/src/zh/UserGuide/latest-Table/User-Manual/Data-Sync_timecho.md @@ -578,7 +578,7 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 |-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------| -------- | ------------ | | sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | @@ -592,25 +592,25 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 #### iotdb-air-gap-sink -| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | -| ---------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- | +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|------------------------------| ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | | compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | | rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | -| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | | air-gap.handshake-timeout-ms | 发送端与接收端在首次尝试建立连接时握手请求的超时时长,单位:毫秒 | Integer | 选填 | 5000 | #### iotdb-thrift-ssl-sink -| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | -| --------------------------- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| user/usename | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | | password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | diff --git a/src/zh/UserGuide/latest/User-Manual/Data-Sync_apache.md b/src/zh/UserGuide/latest/User-Manual/Data-Sync_apache.md index ef41e2e37..a12c39a04 100644 --- a/src/zh/UserGuide/latest/User-Manual/Data-Sync_apache.md +++ b/src/zh/UserGuide/latest/User-Manual/Data-Sync_apache.md @@ -476,54 +476,62 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 ## 5. 参考:参数说明 -### 5.1 source 参数(V1.3.3) - -| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | -| ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------------- | -| source | iotdb-source | String: iotdb-source | 必填 | - | -| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | -| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | -| mode | 用于在每个 data region 发送完毕时分别发送结束事件,并在全部 data region 发送完毕后自动 drop pipe。query:结束,subscribe:不结束。 | String: query / subscribe | 选填 | subscribe | -| path | 用于筛选待同步的时间序列及其相关元数据 / 数据的路径模式元数据同步只能用pathpath 是精确匹配,参数必须为前缀路径或完整路径,即不能含有 `"*"`,最多在 path参数的尾部含有一个 `"**"` | String:IoTDB 的 pattern | 选填 | root.** | -| pattern | 用于筛选时间序列的路径前缀 | String: 任意的时间序列前缀 | 选填 | root | -| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | -| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | -| realtime.mode | 新插入数据(pipe 创建后)的抽取模式 | String: batch | 选填 | batch | -| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true | 选填 | true | -| history.loose-range | tsfile传输时,是否放宽历史数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| realtime.loose-range | tsfile传输时,是否放宽实时数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| mods.enable | 是否发送 tsfile 的 mods 文件 | Boolean: true / false | 选填 | false | - -> 💎 **说明**:为保持低版本兼容,history.enable、history.start-time、history.end-time、realtime.enable 仍可使用,但在新版本中不推荐。 -> -> 💎 **说明:数据抽取模式 batch 的含义** -> - **batch**:该模式下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 - - -### 5.2 sink 参数 - -> 在 1.3.3 及以上的版本中,只包含sink的情况下,不再需要额外增加with sink 前缀 +### 5.1 source 参数 + +| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | +| ------------------------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------| -------- | -------------- | +| source | iotdb-source | String: iotdb-source | 必填 | - | +| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | +| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | +| mode.streaming | 此参数指定时序数据写入的捕获来源。适用于 `mode.streaming`为 `false` 模式下的场景,决定`inclusion`中`data.insert`数据的捕获来源。提供两种捕获策略:true: 动态选择捕获的类型。系统将根据下游处理速度,自适应地选择是捕获每个写入请求还是仅捕获 TsFile 文件的封口请求。当下游处理速度快时,优先捕获写入请求以减少延迟;当处理速度慢时,仅捕获文件封口请求以避免处理堆积。这种模式适用于大多数场景,能够实现处理延迟和吞吐量的最优平衡。false:固定按批捕获方式。仅捕获 TsFile 文件的封口请求,适用于资源受限的应用场景,以降低系统负载。注意,pipe 启动时捕获的快照数据只会以文件的方式供下游处理。 | Boolean: true / false | 否 | true | +| mode.strict | 在使用 time / path / database-name / table-name 参数过滤数据时,是否需要严格按照条件筛选:`true`: 严格筛选。系统将完全按照给定条件过滤筛选被捕获的数据,确保只有符合条件的数据被选中。`false`:非严格筛选。系统在筛选被捕获的数据时可能会包含一些额外的数据,适用于性能敏感的场景,可降低 CPU 和 IO 消耗。 | Boolean: true / false | 否 | true | +| mode.snapshot | 此参数决定时序数据的捕获方式,影响`inclusion`中的`data`数据。提供两种模式:`true`:静态数据捕获。启动 pipe 时,会进行一次性的数据快照捕获。当快照数据被完全消费后,**pipe 将自动终止(DROP PIPE SQL 会自动执行)**。`false`:动态数据捕获。除了在 pipe 启动时捕获快照数据外,还会持续捕获后续的数据变更。pipe 将持续运行以处理动态数据流。 | Boolean: true / false | 否 | false | +| path | 当用户连接指定的sql_dialect为tree时可以指定。对于升级上来的用户pipe,默认sql_dialect为tree。此参数决定时序数据的捕获范围,影响 inclusion中的data数据,以及部分序列相关的元数据。当数据的树模型路径能够被path匹配时,数据会被筛选出来进入流处理pipe。 | String:IoTDB标准的树路径模式,可以带通配符 | 选填 | root.** | +| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | +| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | +| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true, false | 选填 | true | + +> 💎 **说明:数据抽取模式 mode.streaming 取值 true 和 false 的差异** +> - **true(推荐)**:该取值下,任务将对数据进行实时处理、发送,其特点是高时效、低吞吐 +> - **false**:该取值下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 + + +### 5.2 sink 参数 #### iotdb-thrift-sink -| key | value | value 取值范围 | 是否必填 | 默认取值 | -| ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | -| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | -| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | -| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | -| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------|----------|--------------| +| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | +| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | +| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | +| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid | -#### iotdb-thrift-ssl-sink -| key | value | value 取值范围 | 是否必填 | 默认取值 | -| ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | -| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | -| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | -| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | -| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | -| ssl.trust-store-path | 连接目标端 DataNode 所需的 trust store 证书路径 | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| ssl.trust-store-pwd | 连接目标端 DataNode 所需的 trust store 证书密码 | Integer | 必填 | - | +#### iotdb-thrift-ssl-sink +| **参数** | **描述** | **value 取值范围** | **是否必填** | **默认取值** | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -------- | ------------ | +| sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | +| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | +| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | +| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| ssl.trust-store-path | 连接目标端 DataNode 所需的 trust store 证书路径 | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| ssl.trust-store-pwd | 连接目标端 DataNode 所需的 trust store 证书密码 | Integer | 必填 | - | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid | diff --git a/src/zh/UserGuide/latest/User-Manual/Data-Sync_timecho.md b/src/zh/UserGuide/latest/User-Manual/Data-Sync_timecho.md index 66e4a73d9..1c30b63db 100644 --- a/src/zh/UserGuide/latest/User-Manual/Data-Sync_timecho.md +++ b/src/zh/UserGuide/latest/User-Manual/Data-Sync_timecho.md @@ -561,44 +561,44 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 ## 5. 参考:参数说明 -### 5.1 source 参数(V1.3.3) - -| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | -| ------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------------- | -| source | iotdb-source | String: iotdb-source | 必填 | - | -| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | -| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | -| mode | 用于在每个 data region 发送完毕时分别发送结束事件,并在全部 data region 发送完毕后自动 drop pipe。query:结束,subscribe:不结束。 | String: query / subscribe | 选填 | subscribe | -| path | 用于筛选待同步的时间序列及其相关元数据 / 数据的路径模式元数据同步只能用pathpath 是精确匹配,参数必须为前缀路径或完整路径,即不能含有 `"*"`,最多在 path参数的尾部含有一个 `"**"` | String:IoTDB 的 pattern | 选填 | root.** | -| pattern | 用于筛选时间序列的路径前缀 | String: 任意的时间序列前缀 | 选填 | root | -| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | -| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | -| realtime.mode | 新插入数据(pipe创建后)的抽取模式 | String: stream, batch | 选填 | batch | -| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true, false | 选填 | true | -| history.loose-range | tsfile传输时,是否放宽历史数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| realtime.loose-range | tsfile传输时,是否放宽实时数据(pipe创建前)范围。"":不放宽范围,严格按照设置的条件挑选数据"time":放宽时间范围,避免对TsFile进行拆分,可以提升同步效率"path":放宽路径范围,避免对TsFile进行拆分,可以提升同步效率"time, path" 、 "path, time" 、"all" : 放宽所有范围,避免对TsFile进行拆分,可以提升同步效率 | String: "" 、 "time" 、 "path" 、 "time, path" 、 "path, time" 、 "all" | 选填 | "" | -| mods.enable | 是否发送 tsfile 的 mods 文件 | Boolean: true / false | 选填 | false | - -> 💎 **说明**:为保持低版本兼容,history.enable、history.start-time、history.end-time、realtime.enable 仍可使用,但在新版本中不推荐。 -> -> 💎 **说明:数据抽取模式 stream 和 batch 的差异** -> - **stream(推荐)**:该模式下,任务将对数据进行实时处理、发送,其特点是高时效、低吞吐 -> - **batch**:该模式下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 +### 5.1 source 参数 + +| 参数 | 描述 | value 取值范围 | 是否必填 | 默认取值 | +| ------------------------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------| -------- | -------------- | +| source | iotdb-source | String: iotdb-source | 必填 | - | +| inclusion | 用于指定数据同步任务中需要同步范围,分为数据、元数据和权限 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | data.insert | +| inclusion.exclusion | 用于从 inclusion 指定的同步范围内排除特定的操作,减少同步的数据量 | String:all, data(insert,delete), schema(database,timeseries,ttl), auth | 选填 | 空字符串 | +| mode.streaming | 此参数指定时序数据写入的捕获来源。适用于 `mode.streaming`为 `false` 模式下的场景,决定`inclusion`中`data.insert`数据的捕获来源。提供两种捕获策略:true: 动态选择捕获的类型。系统将根据下游处理速度,自适应地选择是捕获每个写入请求还是仅捕获 TsFile 文件的封口请求。当下游处理速度快时,优先捕获写入请求以减少延迟;当处理速度慢时,仅捕获文件封口请求以避免处理堆积。这种模式适用于大多数场景,能够实现处理延迟和吞吐量的最优平衡。false:固定按批捕获方式。仅捕获 TsFile 文件的封口请求,适用于资源受限的应用场景,以降低系统负载。注意,pipe 启动时捕获的快照数据只会以文件的方式供下游处理。 | Boolean: true / false | 否 | true | +| mode.strict | 在使用 time / path / database-name / table-name 参数过滤数据时,是否需要严格按照条件筛选:`true`: 严格筛选。系统将完全按照给定条件过滤筛选被捕获的数据,确保只有符合条件的数据被选中。`false`:非严格筛选。系统在筛选被捕获的数据时可能会包含一些额外的数据,适用于性能敏感的场景,可降低 CPU 和 IO 消耗。 | Boolean: true / false | 否 | true | +| mode.snapshot | 此参数决定时序数据的捕获方式,影响`inclusion`中的`data`数据。提供两种模式:`true`:静态数据捕获。启动 pipe 时,会进行一次性的数据快照捕获。当快照数据被完全消费后,**pipe 将自动终止(DROP PIPE SQL 会自动执行)**。`false`:动态数据捕获。除了在 pipe 启动时捕获快照数据外,还会持续捕获后续的数据变更。pipe 将持续运行以处理动态数据流。 | Boolean: true / false | 否 | false | +| path | 当用户连接指定的sql_dialect为tree时可以指定。对于升级上来的用户pipe,默认sql_dialect为tree。此参数决定时序数据的捕获范围,影响 inclusion中的data数据,以及部分序列相关的元数据。当数据的树模型路径能够被path匹配时,数据会被筛选出来进入流处理pipe。 | String:IoTDB标准的树路径模式,可以带通配符 | 选填 | root.** | +| start-time | 同步所有数据的开始 event time,包含 start-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MIN_VALUE | +| end-time | 同步所有数据的结束 event time,包含 end-time | Long: [Long.MIN_VALUE, Long.MAX_VALUE] | 选填 | Long.MAX_VALUE | +| forwarding-pipe-requests | 是否转发由其他 Pipe (通常是数据同步)写入的数据 | Boolean: true, false | 选填 | true | + +> 💎 **说明:数据抽取模式 mode.streaming 取值 true 和 false 的差异** +> - **true(推荐)**:该取值下,任务将对数据进行实时处理、发送,其特点是高时效、低吞吐 +> - **false**:该取值下,任务将对数据进行批量(按底层数据文件)处理、发送,其特点是低时效、高吞吐 ### 5.2 sink **参数** -> 在 1.3.3 及以上的版本中,只包含sink的情况下,不再需要额外增加with sink 前缀 - #### iotdb-thrift-sink -| key | value | value 取值范围 | 是否必填 | 默认取值 | -| ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | -| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | -| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | -| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | -| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | -| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| key | value | value 取值范围 | 是否必填 | 默认取值 | +|-----------------------------| ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | +| sink | iotdb-thrift-sink 或 iotdb-thrift-async-sink | String: iotdb-thrift-sink 或 iotdb-thrift-async-sink | 必填 | - | +| node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | +| batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | +| batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid | #### iotdb-air-gap-sink @@ -606,6 +606,12 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | ---------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- | | sink | iotdb-air-gap-sink | String: iotdb-air-gap-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | | air-gap.handshake-timeout-ms | 发送端与接收端在首次尝试建立连接时握手请求的超时时长,单位:毫秒 | Integer | 选填 | 5000 | #### iotdb-thrift-ssl-sink @@ -614,8 +620,15 @@ pipe_all_sinks_rate_limit_bytes_per_second=-1 | ----------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | ------------ | | sink | iotdb-thrift-ssl-sink | String: iotdb-thrift-ssl-sink | 必填 | - | | node-urls | 目标端 IoTDB 任意多个 DataNode 节点的数据服务端口的 url(请注意同步任务不支持向自身服务进行转发) | String. 例:'127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | +| user/username | 连接接收端使用的用户名,同步要求该用户具备相应的操作权限 | String | 选填 | root | +| password | 连接接收端使用的用户名对应的密码,同步要求该用户具备相应的操作权限 | String | 选填 | root | | batch.enable | 是否开启日志攒批发送模式,用于提高传输吞吐,降低 IOPS | Boolean: true, false | 选填 | true | | batch.max-delay-seconds | 在开启日志攒批发送模式时生效,表示一批数据在发送前的最长等待时间(单位:s) | Integer | 选填 | 1 | | batch.size-bytes | 在开启日志攒批发送模式时生效,表示一批数据最大的攒批大小(单位:byte) | Long | 选填 | 16*1024*1024 | +| compressor | 所选取的 rpc 压缩算法,可配置多个,对每个请求顺序采用 | String: snappy / gzip / lz4 / zstd / lzma2 | 选填 | "" | +| compressor.zstd.level | 所选取的 rpc 压缩算法为 zstd 时,可使用该参数额外配置 zstd 算法的压缩等级 | Int: [-131072, 22] | 选填 | 3 | +| rate-limit-bytes-per-second | 每秒最大允许传输的 byte 数,计算压缩后的 byte(如压缩),若小于 0 则不限制 | Double: [Double.MIN_VALUE, Double.MAX_VALUE] | 选填 | -1 | +| load-tsfile-strategy | 文件同步数据时,接收端请求返回发送端前,是否等待接收端本地的 load tsfile 执行结果返回。
sync:等待本地的 load tsfile 执行结果返回;
async:不等待本地的 load tsfile 执行结果返回。 | String: sync / async | 选填 | sync | | ssl.trust-store-path | 连接目标端 DataNode 所需的 trust store 证书路径 | String.Example: '127.0.0.1:6667,127.0.0.1:6668,127.0.0.1:6669', '127.0.0.1:6667' | 必填 | - | | ssl.trust-store-pwd | 连接目标端 DataNode 所需的 trust store 证书密码 | Integer | 必填 | - | +| format | 数据传输的payload格式, 可选项包括:
- hybrid: 取决于 processor 传递过来的格式(tsfile或tablet),sink不做任何转换。
- tsfile:强制转换成tsfile发送,可用于数据文件备份等场景。
- tablet:强制转换成tsfile发送,可用于发送端/接收端数据类型不完全兼容时的数据同步(以减少报错)。 | String: hybrid / tsfile / tablet | 选填 | hybrid |