From 27d5815ab0d43635ce4bfd43f209fd219c584e2b Mon Sep 17 00:00:00 2001 From: Yuri Smirnov Date: Fri, 24 Apr 2026 13:47:01 +0300 Subject: [PATCH 1/7] Add clickhouse-native backend support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UmbrellioUtils::ClickHouse becomes a polymorphic facade with two adapters: Backends::Legacy (existing `click_house` HTTP gem) and Backends::Native (new `clickhouse-native` TCP gem). Consumers pick the backend via `UmbrellioUtils.config.clickhouse_backend` — default stays `:legacy` so existing users are unaffected. Shared logic (from, count, DDL helpers, parse_value, temp-table plumbing) lives on Backends::Base; each adapter provides the low-level ops (execute/query/insert/describe_table/tables/server_version/ admin_execute) and a SERVER_ERROR class. DDL that creates/drops the configured database routes through admin_execute so the native adapter can open a one-shot client against the always-present `default` db (the native TCP protocol needs a reachable db, unlike HTTP). Native consumers also get a `::ClickHouse.config` shim (loaded eagerly from `configure` when `:native` is set) so external code that reads `::ClickHouse.config` directly — e.g. umbrellio-sequel-plugins' ch:create rake task — keeps working without the old click_house gem. ch:connect rake task is now backend-agnostic via UmbrellioUtils::ClickHouse.config. Version bump 1.12.1 → 1.13.0. Co-Authored-By: Claude Opus 4.7 (1M context) --- Gemfile | 4 + Gemfile.lock | 24 ++- lib/umbrellio_utils.rb | 15 +- lib/umbrellio_utils/click_house.rb | 194 +++--------------- lib/umbrellio_utils/click_house/backends.rb | 12 ++ .../click_house/backends/base.rb | 164 +++++++++++++++ .../click_house/backends/legacy.rb | 92 +++++++++ .../click_house/backends/native.rb | 110 ++++++++++ lib/umbrellio_utils/click_house/config.rb | 13 ++ .../tasks/clickhouse_connect.rake | 11 +- lib/umbrellio_utils/version.rb | 2 +- .../click_house/backend_dispatch_spec.rb | 50 +++++ spec/umbrellio_utils_spec.rb | 2 + 13 files changed, 517 insertions(+), 176 deletions(-) create mode 100644 lib/umbrellio_utils/click_house/backends.rb create mode 100644 lib/umbrellio_utils/click_house/backends/base.rb create mode 100644 lib/umbrellio_utils/click_house/backends/legacy.rb create mode 100644 lib/umbrellio_utils/click_house/backends/native.rb create mode 100644 lib/umbrellio_utils/click_house/config.rb create mode 100644 spec/umbrellio_utils/click_house/backend_dispatch_spec.rb diff --git a/Gemfile b/Gemfile index 59ad9ef..9a00535 100644 --- a/Gemfile +++ b/Gemfile @@ -9,6 +9,10 @@ gem "activesupport" gem "bundler" gem "ci-helper" gem "click_house", github: "umbrellio/click_house", branch: "master" +# clickhouse-native requires Ruby >= 3.3; gate so 3.1/3.2 CI still bundles. +install_if -> { Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.3.0") } do + gem "clickhouse-native" +end gem "csv" gem "http" gem "net-pop" diff --git a/Gemfile.lock b/Gemfile.lock index 7f65cde..5941539 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -10,7 +10,7 @@ GIT PATH remote: . specs: - umbrellio-utils (1.12.1) + umbrellio-utils (1.13.0) memery (~> 1) GEM @@ -104,6 +104,16 @@ GEM colorize (~> 1.1) dry-inflector (~> 1.0) umbrellio-sequel-plugins (~> 0.14) + clickhouse-native (0.5.0) + connection_pool (>= 2.4) + clickhouse-native (0.5.0-aarch64-linux-gnu) + connection_pool (>= 2.4) + clickhouse-native (0.5.0-arm64-darwin) + connection_pool (>= 2.4) + clickhouse-native (0.5.0-x86_64-darwin) + connection_pool (>= 2.4) + clickhouse-native (0.5.0-x86_64-linux-gnu) + connection_pool (>= 2.4) coderay (1.1.3) colorize (1.1.0) concurrent-ruby (1.3.5) @@ -125,6 +135,7 @@ GEM logger faraday-net_http (3.4.2) net-http (~> 0.5) + ffi (1.17.2) ffi (1.17.2-aarch64-linux-gnu) ffi (1.17.2-aarch64-linux-musl) ffi (1.17.2-arm-linux-gnu) @@ -178,7 +189,10 @@ GEM memery (1.7.0) method_source (1.1.0) mini_mime (1.1.5) - minitest (5.25.5) + mini_portile2 (2.8.9) + minitest (6.0.5) + drb (~> 2.0) + prism (~> 1.5) net-http (0.8.0) uri (>= 0.11.1) net-imap (0.5.10) @@ -191,6 +205,9 @@ GEM net-smtp (0.5.1) net-protocol nio4r (2.7.4) + nokogiri (1.18.10) + mini_portile2 (~> 2.8.2) + racc (~> 1.4) nokogiri (1.18.10-aarch64-linux-gnu) racc (~> 1.4) nokogiri (1.18.10-aarch64-linux-musl) @@ -381,7 +398,7 @@ GEM sequel unicode-display_width (3.2.0) unicode-emoji (~> 4.1) - unicode-emoji (4.1.0) + unicode-emoji (4.2.0) uri (1.1.1) useragent (0.16.11) websocket-driver (0.8.0) @@ -408,6 +425,7 @@ DEPENDENCIES bundler ci-helper click_house! + clickhouse-native csv http net-pop diff --git a/lib/umbrellio_utils.rb b/lib/umbrellio_utils.rb index d311199..8f14ae9 100644 --- a/lib/umbrellio_utils.rb +++ b/lib/umbrellio_utils.rb @@ -16,7 +16,14 @@ def included(othermod) def config synchronize do @@config ||= Struct - .new(:store_table_name, :http_client_name, :ch_optimize_timeout, keyword_init: true) + .new( + :store_table_name, + :http_client_name, + :ch_optimize_timeout, + :clickhouse_backend, + :clickhouse_native_settings, + keyword_init: true, + ) .new(**default_settings) end end @@ -25,6 +32,10 @@ def config def configure synchronize { yield config } + # Consumers on the native backend rely on `::ClickHouse.config` + # being available at rake/runtime (e.g. umbrellio-sequel-plugins' + # ch:create). Load the shim eagerly once the backend is selected. + require_relative "umbrellio_utils/click_house/config" if config.clickhouse_backend == :native end def extend_util!(module_name, &block) @@ -39,6 +50,8 @@ def default_settings store_table_name: :store, http_client_name: :application_httpclient, ch_optimize_timeout: 5.minutes, + clickhouse_backend: :legacy, + clickhouse_native_settings: {}, } end diff --git a/lib/umbrellio_utils/click_house.rb b/lib/umbrellio_utils/click_house.rb index a70e7f4..57c7666 100644 --- a/lib/umbrellio_utils/click_house.rb +++ b/lib/umbrellio_utils/click_house.rb @@ -1,188 +1,50 @@ # frozen_string_literal: true module UmbrellioUtils + # Polymorphic ClickHouse facade. The active backend is picked up from + # `UmbrellioUtils.config.clickhouse_backend` — `:legacy` routes through + # the `click_house` gem (HTTP), `:native` through the `clickhouse-native` + # gem (TCP). Both backends expose the same public surface so consumer + # code (including UmbrellioUtils::Migrations) is backend-agnostic. module ClickHouse - include Memery - extend self - delegate :create_database, :drop_database, :tables, :config, to: :client - - def insert(table_name, db_name: self.db_name, rows: []) - client.insert(full_table_name(table_name, db_name), rows, format: "JSONEachRow") - end - - def from(source, db_name: self.db_name) - ds = - case source - when Symbol - DB.from(db_name == self.db_name ? SQL[source] : SQL[db_name][source]) - when nil - DB.dataset - else - DB.from(source) - end - - ds.clone(ch: true) - end - - def execute(sql, host: nil, **opts) - log_errors(sql) do - client(host).execute(sql, params: opts) - end - end - - def query(dataset, host: nil, **opts) - sql = sql_for(dataset) - - log_errors(sql) do - select_all(sql, host:, **opts).map { |x| Misc::StrictHash[x.symbolize_keys] } - end - end - - def query_value(dataset, host: nil, **opts) - sql = sql_for(dataset) - - log_errors(sql) do - select_value(sql, host:, **opts) - end - end - - def count(dataset) - query_value(dataset.select(SQL.ch_count)) - end - - def optimize_table!(table_name, db_name: self.db_name) - Timeout.timeout(UmbrellioUtils.config.ch_optimize_timeout) do - execute("OPTIMIZE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster FINAL") - end - end - - def truncate_table!(table_name, db_name: self.db_name) - execute("TRUNCATE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC") - end - - def drop_table!(table_name, db_name: self.db_name) - execute("DROP TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC") - end - - def describe_table(table_name, db_name: self.db_name) - sql = "DESCRIBE TABLE #{full_table_name(table_name, db_name)} FORMAT JSON" + autoload :Backends, "umbrellio_utils/click_house/backends" - log_errors(sql) do - select_all(sql).map { |x| Misc::StrictHash[x.symbolize_keys] } - end - end + VALID_BACKENDS = %i[legacy native].freeze - def db_name - client.config.database.to_sym - end + DELEGATED = %i[ + execute query query_value query_each count insert + from describe_table server_version tables + create_database drop_database db_name config + truncate_table! drop_table! optimize_table! + parse_value pg_table_connection populate_temp_table! with_temp_table + ].freeze - def parse_value(value, type:) - case type - when /String/ - value&.to_s - when /DateTime/ - Time.zone.parse(value) if value - else - value + DELEGATED.each do |method_name| + define_method(method_name) do |*args, **kwargs, &block| + backend.public_send(method_name, *args, **kwargs, &block) end end - def server_version - select_value("SELECT version()").to_f + def backend + @backend ||= backend_for(UmbrellioUtils.config.clickhouse_backend) end - def pg_table_connection(table) - host = ENV["PGHOST"] || DB.opts[:host].presence || "localhost" - port = DB.opts[:port] || 5432 - database = DB.opts[:database] - username = DB.opts[:user] - password = DB.opts[:password] - - Sequel.function(:postgresql, "#{host}:#{port}", database, table, username, password) - end - - def with_temp_table( - dataset, temp_table_name:, primary_key: [:id], primary_key_types: [:integer], **opts, & - ) - unless DB.table_exists?(temp_table_name) - UmbrellioUtils::Database.create_temp_table( - nil, primary_key:, primary_key_types:, temp_table_name:, & - ) - populate_temp_table!(temp_table_name, dataset) - end - UmbrellioUtils::Database.with_temp_table(nil, primary_key:, temp_table_name:, **opts, &) + # Testing hook — clears the memoized backend so specs can flip + # `clickhouse_backend` mid-run. Not part of the public API. + def reset_backend! + @backend = nil end private - def client(host = nil) - cfg = ::ClickHouse.config - cfg.host = resolve(host) if host - ::ClickHouse::Connection.new(cfg) - end - memoize :client, ttl: 1.minute - - def resolve(host) - IPSocket.getaddress(host) - rescue => e - Exceptions.notify!(e, raise_errors: false) - config.host - end - - def logger - client.config.logger - end - - def log_errors(sql) - yield - rescue ::ClickHouse::Error => e - logger.error("ClickHouse error: #{e.inspect}\nSQL: #{sql}") - raise e - end - - def sql_for(dataset) - unless ch_dataset?(dataset) - raise "Non-ClickHouse dataset: #{dataset.inspect}. " \ - "You should use `CH.from` instead of `DB`" + def backend_for(name) + case name + when :legacy then Backends::Legacy.instance + when :native then Backends::Native.instance + else raise "Unknown clickhouse_backend: #{name.inspect} (expected one of #{VALID_BACKENDS})" end - - dataset.sql - end - - def ch_dataset?(dataset) - case dataset - when Sequel::Dataset - dataset.opts[:ch] && Array(dataset.opts[:from]).all? { |x| ch_dataset?(x) } - when Sequel::SQL::AliasedExpression - ch_dataset?(dataset.expression) - when Sequel::SQL::Identifier, Sequel::SQL::QualifiedIdentifier - true - else - raise "Unknown dataset type: #{dataset.inspect}" - end - end - - def full_table_name(table_name, db_name) - table_name = table_name.value if table_name.is_a?(Sequel::SQL::Identifier) - "#{db_name}.#{table_name}" - end - - def select_all(sql, host: nil, **opts) - response = client(host).get(body: sql, query: { default_format: "JSON", **opts }) - ::ClickHouse::Response::Factory.response(response, client(host).config) - end - - def select_value(...) - select_all(...).first.to_a.dig(0, -1) - end - - def populate_temp_table!(temp_table_name, dataset) - execute(<<~SQL.squish) - INSERT INTO TABLE FUNCTION #{DB.literal(pg_table_connection(temp_table_name))} - #{dataset.sql} - SQL end end end diff --git a/lib/umbrellio_utils/click_house/backends.rb b/lib/umbrellio_utils/click_house/backends.rb new file mode 100644 index 0000000..64e1c6e --- /dev/null +++ b/lib/umbrellio_utils/click_house/backends.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +require_relative "backends/base" + +module UmbrellioUtils + module ClickHouse + module Backends + autoload :Legacy, "umbrellio_utils/click_house/backends/legacy" + autoload :Native, "umbrellio_utils/click_house/backends/native" + end + end +end diff --git a/lib/umbrellio_utils/click_house/backends/base.rb b/lib/umbrellio_utils/click_house/backends/base.rb new file mode 100644 index 0000000..fd8a5b2 --- /dev/null +++ b/lib/umbrellio_utils/click_house/backends/base.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +require "singleton" + +module UmbrellioUtils + module ClickHouse + module Backends + # Abstract backend. Each concrete backend (Legacy for the `click_house` + # gem, Native for the `clickhouse-native` gem) implements the low-level + # ops (execute / query / insert / describe_table / server_version / + # tables / create_database / drop_database / config / logger) and a + # SERVER_ERROR constant used by `log_errors`. + class Base + include Singleton + + # Concrete backends implement the low-level ops (execute / query / + # insert / describe_table / server_version / tables / admin_execute + # / config / logger) and define SERVER_ERROR. + + def from(source, db_name: self.db_name) + ds = + case source + when Symbol + DB.from(db_name == self.db_name ? SQL[source] : SQL[db_name][source]) + when nil + DB.dataset + else + DB.from(source) + end + ds.clone(ch: true) + end + + def count(dataset) + query_value(dataset.select(SQL.ch_count)) + end + + def db_name + config.database.to_sym + end + + def create_database(name, if_not_exists: false, cluster: nil, engine: nil) + admin_execute( + format( + "CREATE DATABASE %s %s %s %s", + exists: if_not_exists ? "IF NOT EXISTS" : "", + name:, + cluster: cluster ? "ON CLUSTER #{cluster}" : "", + engine: engine ? "ENGINE = #{engine}" : "", + ), + ) + end + + def drop_database(name, if_exists: false, cluster: nil) + admin_execute( + format( + "DROP DATABASE %s %s %s", + exists: if_exists ? "IF EXISTS" : "", + name:, + cluster: cluster ? "ON CLUSTER #{cluster}" : "", + ), + ) + end + + def truncate_table!(table_name, db_name: self.db_name) + execute("TRUNCATE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC") + end + + def drop_table!(table_name, db_name: self.db_name) + execute("DROP TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC") + end + + def optimize_table!(table_name, db_name: self.db_name) + Timeout.timeout(UmbrellioUtils.config.ch_optimize_timeout) do + execute("OPTIMIZE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster FINAL") + end + end + + def parse_value(value, type:) + case type + when /Array/ then Array.wrap(value) + when /DateTime/ + case value + when String then value.present? ? Time.zone.parse(value) : nil + else value + end + when /String/ then value&.to_s + else value + end + end + + def pg_table_connection(table, schema: "public") + host = ENV["PGHOST"] || DB.opts[:host].presence || "localhost" + port = DB.opts[:port] || 5432 + # Etc.getlogin returns "root" under non-TTY shells (e.g. rake from + # a CI runner), which is almost never a real PG role. Prefer $USER. + login = ENV["USER"].presence || Etc.getlogin + database = DB.opts[:database].presence || login + username = DB.opts[:user].presence || login + password = DB.opts[:password] + SQL.func(:postgresql, "#{host}:#{port}", database, table, username, password, schema) + end + + def populate_temp_table!(temp_table_name, dataset, schema: "public") + execute(<<~SQL.squish) + INSERT INTO TABLE FUNCTION #{DB.literal(pg_table_connection(temp_table_name, schema:))} + #{dataset.sql} + SQL + end + + def with_temp_table( + dataset, temp_table_name:, primary_key: [:id], primary_key_types: [:integer], **opts, & + ) + unless DB.table_exists?(temp_table_name) + UmbrellioUtils::Database.create_temp_table( + nil, primary_key:, primary_key_types:, temp_table_name:, & + ) + populate_temp_table!(temp_table_name, dataset) + end + UmbrellioUtils::Database.with_temp_table(nil, primary_key:, temp_table_name:, **opts, &) + end + + protected + + def log_errors(sql) + yield + rescue self.class::SERVER_ERROR => e + logger.error("ClickHouse error: #{e.inspect}\nSQL: #{sql}") + raise e + end + + def sql_for(dataset) + return dataset if dataset.is_a?(String) + unless ch_dataset?(dataset) + raise "Non-ClickHouse dataset: #{dataset.inspect}. " \ + "You should use `CH.from` instead of `DB`" + end + dataset.sql + end + + def ch_dataset?(dataset) + case dataset + when Sequel::Dataset + dataset.opts[:ch] && Array(dataset.opts[:from]).all? { |x| ch_dataset?(x) } + when Sequel::SQL::AliasedExpression + ch_dataset?(dataset.expression) + when Sequel::SQL::Identifier, Sequel::SQL::QualifiedIdentifier + true + else + raise "Unknown dataset type: #{dataset.inspect}" + end + end + + def normalize_identifier(name) + name = name.value if name.is_a?(Sequel::SQL::Identifier) + name.to_s + end + + def full_table_name(table_name, db_name) + "#{db_name}.#{normalize_identifier(table_name)}" + end + end + end + end +end diff --git a/lib/umbrellio_utils/click_house/backends/legacy.rb b/lib/umbrellio_utils/click_house/backends/legacy.rb new file mode 100644 index 0000000..87e61e7 --- /dev/null +++ b/lib/umbrellio_utils/click_house/backends/legacy.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require "click_house" + +module UmbrellioUtils + module ClickHouse + module Backends + # Adapter for the umbrellio/click_house gem (HTTP driver). + class Legacy < Base + include Memery + + SERVER_ERROR = ::ClickHouse::Error + + def execute(sql, host: nil, **opts) + log_errors(sql) { client(host).execute(sql, params: opts) } + end + + def query(dataset, host: nil, **opts) + sql = sql_for(dataset) + log_errors(sql) do + select_all(sql, host:, **opts).map { |x| Misc::StrictHash[x.symbolize_keys] } + end + end + + def query_value(dataset, host: nil, **opts) + sql = sql_for(dataset) + log_errors(sql) { select_value(sql, host:, **opts) } + end + + def query_each(dataset, host: nil, **opts, &) + query(dataset, host:, **opts).each(&) + end + + def insert(table_name, db_name: self.db_name, rows: []) + client.insert(full_table_name(table_name, db_name), rows, format: "JSONEachRow") + end + + def describe_table(table_name, db_name: self.db_name) + sql = "DESCRIBE TABLE #{full_table_name(table_name, db_name)} FORMAT JSON" + log_errors(sql) { select_all(sql).map { |x| Misc::StrictHash[x.symbolize_keys] } } + end + + def server_version + select_value("SELECT version()").to_f + end + + def tables + client.tables + end + + # Legacy HTTP driver can issue DDL directly; no admin side-channel + # needed. Base#create_database / #drop_database call this. + def admin_execute(sql) + client.execute(sql) + end + + def config + client.config + end + + def logger + client.config.logger + end + + private + + def client(host = nil) + cfg = ::ClickHouse.config + cfg.host = resolve(host) if host + ::ClickHouse::Connection.new(cfg) + end + memoize :client, ttl: 1.minute + + def resolve(host) + IPSocket.getaddress(host) + rescue => e + Exceptions.notify!(e, raise_errors: false) + config.host + end + + def select_all(sql, host: nil, **opts) + response = client(host).get(body: sql, query: { default_format: "JSON", **opts }) + ::ClickHouse::Response::Factory.response(response, client(host).config) + end + + def select_value(...) + select_all(...).first.to_a.dig(0, -1) + end + end + end + end +end diff --git a/lib/umbrellio_utils/click_house/backends/native.rb b/lib/umbrellio_utils/click_house/backends/native.rb new file mode 100644 index 0000000..3322659 --- /dev/null +++ b/lib/umbrellio_utils/click_house/backends/native.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +require "clickhouse-native" +require_relative "../config" + +module UmbrellioUtils + module ClickHouse + module Backends + # Adapter for the clickhouse-native gem (TCP driver). + # + # Intentional differences from the HTTP-era module: + # - Values returned by query / query_value are real Ruby types + # (Time, Integer, etc.), not JSON-stringified. + # - The `host:` kwarg on execute / query / query_value is accepted + # for source compatibility but ignored — hostname is bound at + # Pool construction, not per query. + class Native < Base + SERVER_ERROR = ::ClickhouseNative::ServerError + + # Server-side error codes that mean "object doesn't exist". Used by + # describe_table callers that want to tolerate eager-load against a + # database that hasn't been created yet (e.g. rake ch:create). + UNKNOWN_TABLE = 60 + UNKNOWN_DATABASE = 81 + + def execute(sql, host: nil, **_opts) # rubocop:disable Lint/UnusedMethodArgument + sql_string = sql.is_a?(String) ? sql : sql.sql + log_errors(sql_string) { pool.execute(sql_string) } + end + + def query(dataset, host: nil, **_opts) # rubocop:disable Lint/UnusedMethodArgument + sql = sql_for(dataset) + log_errors(sql) { pool.query(sql) } + end + + def query_value(dataset, host: nil, **_opts) # rubocop:disable Lint/UnusedMethodArgument + sql = sql_for(dataset) + log_errors(sql) { pool.query_value(sql) } + end + + def query_each(dataset, host: nil, **_opts, &) # rubocop:disable Lint/UnusedMethodArgument + sql = sql_for(dataset) + log_errors(sql) { pool.query_each(sql, &) } + end + + def insert(table_name, db_name: self.db_name, rows: []) + return if rows.empty? + pool.insert(normalize_identifier(table_name), rows, db_name: db_name.to_s) + end + + def describe_table(table_name, db_name: self.db_name) + pool.describe_table(normalize_identifier(table_name), db_name: db_name.to_s) + end + + def server_version + pool.with(&:server_version).to_f + end + + def tables + pool.query("SHOW TABLES").pluck(:name) + end + + def config + ::ClickHouse.config + end + + # Read through pool so test mocks of `pool` also redirect `db_name`. + def db_name + pool.database.to_sym + end + + def logger + @logger ||= (defined?(Rails) && Rails.logger) || Logger.new($stdout) + end + + def pool + @pool ||= ::ClickhouseNative::Pool.new( + **client_options(database: (config[:database] || "default").to_s), + pool_size: Integer(config[:pool_size] || 5), + pool_timeout: Integer(config[:pool_timeout] || 10), + settings: UmbrellioUtils.config.clickhouse_native_settings || {}, + ) + end + + # DDL that creates/drops the configured database can't run through + # the main pool (which is bound to that database). Open a one-shot + # client connected to the always-present "default" db instead. + def admin_execute(sql) + admin = ::ClickhouseNative::Client.new(**client_options(database: "default")) + admin.execute(sql) + ensure + admin&.close + end + + private + + def client_options(database:) + { + host: config[:host] || "localhost", + port: Integer(config[:port] || 9000), + database:, + user: (config[:username] || "default").to_s, + password: (config[:password] || "").to_s, + logger:, + } + end + end + end + end +end diff --git a/lib/umbrellio_utils/click_house/config.rb b/lib/umbrellio_utils/click_house/config.rb new file mode 100644 index 0000000..af96042 --- /dev/null +++ b/lib/umbrellio_utils/click_house/config.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +# Provides `::ClickHouse.config` when the legacy `click_house` gem is +# not loaded. The legacy gem defines `::ClickHouse::Connection` and its +# own `::ClickHouse.config`; we only step in when it's absent (typical +# for consumers that have migrated to the `clickhouse-native` gem). +unless defined?(ClickHouse::Connection) + module ClickHouse + def self.config + @config ||= Rails.application.config_for(:clickhouse) + end + end +end diff --git a/lib/umbrellio_utils/tasks/clickhouse_connect.rake b/lib/umbrellio_utils/tasks/clickhouse_connect.rake index dbdc5ff..c3f013f 100644 --- a/lib/umbrellio_utils/tasks/clickhouse_connect.rake +++ b/lib/umbrellio_utils/tasks/clickhouse_connect.rake @@ -3,12 +3,13 @@ namespace :ch do desc "run clickhouse client" task connect: :environment do + cfg = UmbrellioUtils::ClickHouse.config params = { - host: ENV.fetch("CLICKHOUSE_HOST", UmbrellioUtils::ClickHouse.config.host), - user: ENV.fetch("CLICKHOUSE_USER", UmbrellioUtils::ClickHouse.config.username), - password: ENV.fetch("CLICKHOUSE_PASSWORD", UmbrellioUtils::ClickHouse.config.password), - database: ENV.fetch("CLICKHOUSE_DATABASE", UmbrellioUtils::ClickHouse.config.database), - **UmbrellioUtils::ClickHouse.config.global_params, + host: ENV.fetch("CLICKHOUSE_HOST", cfg.host), + user: ENV.fetch("CLICKHOUSE_USER", cfg.username), + password: ENV.fetch("CLICKHOUSE_PASSWORD", cfg.password), + database: ENV.fetch("CLICKHOUSE_DATABASE", cfg.database), + **(cfg.try(:global_params) || {}), }.compact_blank cmd = Shellwords.join(["clickhouse", "client", *params.map { |k, v| "--#{k}=#{v}" }]) diff --git a/lib/umbrellio_utils/version.rb b/lib/umbrellio_utils/version.rb index 8b9a8e7..040173a 100644 --- a/lib/umbrellio_utils/version.rb +++ b/lib/umbrellio_utils/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module UmbrellioUtils - VERSION = "1.12.1" + VERSION = "1.13.0" end diff --git a/spec/umbrellio_utils/click_house/backend_dispatch_spec.rb b/spec/umbrellio_utils/click_house/backend_dispatch_spec.rb new file mode 100644 index 0000000..f287ad8 --- /dev/null +++ b/spec/umbrellio_utils/click_house/backend_dispatch_spec.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +describe UmbrellioUtils::ClickHouse, "backend dispatch" do + let(:facade) { described_class } + + before { facade.reset_backend! } + after do + facade.reset_backend! + UmbrellioUtils.configure { |c| c.clickhouse_backend = :legacy } + end + + context "when :legacy" do + before { UmbrellioUtils.configure { |c| c.clickhouse_backend = :legacy } } + + it "returns the Legacy adapter" do + expect(facade.backend).to be_a(UmbrellioUtils::ClickHouse::Backends::Legacy) + end + + it "delegates execute to the adapter" do + expect(facade.backend).to receive(:execute).with("SELECT 1", host: nil) + facade.execute("SELECT 1", host: nil) + end + end + + context "when :native" do + before do + # clickhouse-native gem requires Ruby >= 3.3; gated in Gemfile. + skip "clickhouse-native not installed" unless Gem.loaded_specs.key?("clickhouse-native") + UmbrellioUtils.configure { |c| c.clickhouse_backend = :native } + end + + it "returns the Native adapter" do + expect(facade.backend).to be_a(UmbrellioUtils::ClickHouse::Backends::Native) + end + + it "delegates query to the adapter" do + dataset = double(sql: "SELECT 1") + expect(facade.backend).to receive(:query).with(dataset) + facade.query(dataset) + end + end + + context "when unknown" do + before { UmbrellioUtils.configure { |c| c.clickhouse_backend = :bogus } } + + it "raises" do + expect { facade.backend }.to raise_error(/Unknown clickhouse_backend: :bogus/) + end + end +end diff --git a/spec/umbrellio_utils_spec.rb b/spec/umbrellio_utils_spec.rb index cb6beac..90079fc 100644 --- a/spec/umbrellio_utils_spec.rb +++ b/spec/umbrellio_utils_spec.rb @@ -10,6 +10,8 @@ ch_optimize_timeout: 5.minutes, store_table_name: :store, http_client_name: :application_httpclient, + clickhouse_backend: :legacy, + clickhouse_native_settings: {}, ) end From fb4f38c3469967239c95320eed18e60d5afe6597 Mon Sep 17 00:00:00 2001 From: Yuri Smirnov Date: Tue, 28 Apr 2026 12:11:30 +0300 Subject: [PATCH 2/7] Add clickhouse_native_logger config option Allows consumers to inject a custom logger (e.g. a SemanticLogger named channel) into the native ClickHouse pool, instead of the default Rails.logger fallback. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/umbrellio_utils.rb | 1 + lib/umbrellio_utils/click_house/backends/native.rb | 4 +++- spec/umbrellio_utils_spec.rb | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/umbrellio_utils.rb b/lib/umbrellio_utils.rb index 8f14ae9..26b3eb6 100644 --- a/lib/umbrellio_utils.rb +++ b/lib/umbrellio_utils.rb @@ -22,6 +22,7 @@ def config :ch_optimize_timeout, :clickhouse_backend, :clickhouse_native_settings, + :clickhouse_native_logger, keyword_init: true, ) .new(**default_settings) diff --git a/lib/umbrellio_utils/click_house/backends/native.rb b/lib/umbrellio_utils/click_house/backends/native.rb index 3322659..65b369d 100644 --- a/lib/umbrellio_utils/click_house/backends/native.rb +++ b/lib/umbrellio_utils/click_house/backends/native.rb @@ -70,7 +70,9 @@ def db_name end def logger - @logger ||= (defined?(Rails) && Rails.logger) || Logger.new($stdout) + @logger ||= UmbrellioUtils.config.clickhouse_native_logger || + (defined?(Rails) && Rails.logger) || + Logger.new($stdout) end def pool diff --git a/spec/umbrellio_utils_spec.rb b/spec/umbrellio_utils_spec.rb index 90079fc..00ea762 100644 --- a/spec/umbrellio_utils_spec.rb +++ b/spec/umbrellio_utils_spec.rb @@ -12,6 +12,7 @@ http_client_name: :application_httpclient, clickhouse_backend: :legacy, clickhouse_native_settings: {}, + clickhouse_native_logger: nil, ) end From 591a95b338ab2247b875ff289e0cfc5b1e09c8c7 Mon Sep 17 00:00:00 2001 From: Yuri Smirnov Date: Tue, 28 Apr 2026 22:00:55 +0300 Subject: [PATCH 3/7] Add on_cluster helper + clickhouse_cluster config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Promotes the test-env-aware ON CLUSTER suppression that consumers (profile-service, unetsafe) were duplicating in their local CH modules. Adds: - UmbrellioUtils.config.clickhouse_cluster (default "click_cluster", set to nil/blank to skip the clause everywhere). - Backends::Base#on_cluster(sync:) — returns the ON CLUSTER clause, or "" when the cluster name is blank or `Rails.env.test?`. The cluster *name* is still used by callers like Distributed engine declarations regardless of this clause (each ON CLUSTER op blocks for hundreds of ms on a single-node CH waiting for replicas that don't exist). - truncate_table! / drop_table! / optimize_table! and Migrations#create_distributed_table! now use the helper instead of hardcoding `ON CLUSTER click_cluster`. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/umbrellio_utils.rb | 2 ++ lib/umbrellio_utils/click_house.rb | 2 +- .../click_house/backends/base.rb | 20 ++++++++++++++++--- lib/umbrellio_utils/migrations.rb | 9 ++++++--- spec/umbrellio_utils_spec.rb | 1 + 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/lib/umbrellio_utils.rb b/lib/umbrellio_utils.rb index 26b3eb6..286dae0 100644 --- a/lib/umbrellio_utils.rb +++ b/lib/umbrellio_utils.rb @@ -23,6 +23,7 @@ def config :clickhouse_backend, :clickhouse_native_settings, :clickhouse_native_logger, + :clickhouse_cluster, keyword_init: true, ) .new(**default_settings) @@ -53,6 +54,7 @@ def default_settings ch_optimize_timeout: 5.minutes, clickhouse_backend: :legacy, clickhouse_native_settings: {}, + clickhouse_cluster: "click_cluster", } end diff --git a/lib/umbrellio_utils/click_house.rb b/lib/umbrellio_utils/click_house.rb index 57c7666..133722c 100644 --- a/lib/umbrellio_utils/click_house.rb +++ b/lib/umbrellio_utils/click_house.rb @@ -17,7 +17,7 @@ module ClickHouse execute query query_value query_each count insert from describe_table server_version tables create_database drop_database db_name config - truncate_table! drop_table! optimize_table! + truncate_table! drop_table! optimize_table! on_cluster parse_value pg_table_connection populate_temp_table! with_temp_table ].freeze diff --git a/lib/umbrellio_utils/click_house/backends/base.rb b/lib/umbrellio_utils/click_house/backends/base.rb index fd8a5b2..9aeb44a 100644 --- a/lib/umbrellio_utils/click_house/backends/base.rb +++ b/lib/umbrellio_utils/click_house/backends/base.rb @@ -61,17 +61,31 @@ def drop_database(name, if_exists: false, cluster: nil) ) end + # Returns the `ON CLUSTER [SYNC]` clause for DDL, or "" if + # `UmbrellioUtils.config.clickhouse_cluster` is blank or we're in + # a Rails test env. Test-env suppression saves hundreds of ms per + # DDL on a single-node CH (each ON CLUSTER op blocks waiting for + # replicas that don't exist). The cluster *name* is still used + # by callers like Distributed engine declarations, regardless of + # this clause. + def on_cluster(sync: false) + name = UmbrellioUtils.config.clickhouse_cluster + return "" if name.blank? + return "" if defined?(Rails) && Rails.env.test? + sync ? "ON CLUSTER #{name} SYNC" : "ON CLUSTER #{name}" + end + def truncate_table!(table_name, db_name: self.db_name) - execute("TRUNCATE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC") + execute("TRUNCATE TABLE #{db_name}.#{table_name} #{on_cluster(sync: true)}") end def drop_table!(table_name, db_name: self.db_name) - execute("DROP TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC") + execute("DROP TABLE #{db_name}.#{table_name} #{on_cluster(sync: true)}") end def optimize_table!(table_name, db_name: self.db_name) Timeout.timeout(UmbrellioUtils.config.ch_optimize_timeout) do - execute("OPTIMIZE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster FINAL") + execute("OPTIMIZE TABLE #{db_name}.#{table_name} #{on_cluster} FINAL") end end diff --git a/lib/umbrellio_utils/migrations.rb b/lib/umbrellio_utils/migrations.rb index 2f782fb..8890462 100644 --- a/lib/umbrellio_utils/migrations.rb +++ b/lib/umbrellio_utils/migrations.rb @@ -200,16 +200,19 @@ def check_associations(model, method, reverse_method) end def create_distributed_table!(table_name, sharding_key, db_name: UmbrellioUtils::ClickHouse.db_name) + cluster = UmbrellioUtils.config.clickhouse_cluster + on_cluster = UmbrellioUtils::ClickHouse.on_cluster + UmbrellioUtils::ClickHouse.execute(<<~SQL.squish) DROP TABLE IF EXISTS #{db_name}.#{table_name}_distributed - ON CLUSTER click_cluster + #{on_cluster} SQL UmbrellioUtils::ClickHouse.execute(<<~SQL.squish) CREATE TABLE #{db_name}.#{table_name}_distributed - ON CLUSTER click_cluster + #{on_cluster} AS #{db_name}.#{table_name} - ENGINE = Distributed(click_cluster, #{db_name}, #{table_name}, #{sharding_key}) + ENGINE = Distributed(#{cluster}, #{db_name}, #{table_name}, #{sharding_key}) SQL end diff --git a/spec/umbrellio_utils_spec.rb b/spec/umbrellio_utils_spec.rb index 00ea762..d217043 100644 --- a/spec/umbrellio_utils_spec.rb +++ b/spec/umbrellio_utils_spec.rb @@ -13,6 +13,7 @@ clickhouse_backend: :legacy, clickhouse_native_settings: {}, clickhouse_native_logger: nil, + clickhouse_cluster: "click_cluster", ) end From 62dc5e4570a4aab82e543bd9faf71ff252f1ca47 Mon Sep 17 00:00:00 2001 From: Yuri Smirnov Date: Wed, 29 Apr 2026 12:23:49 +0300 Subject: [PATCH 4/7] wip --- .github/workflows/test.yml | 2 +- .rubocop.yml | 2 +- Gemfile.lock | 362 +++++++++--------- lib/umbrellio_utils/cards.rb | 7 +- .../click_house/backends/base.rb | 4 +- .../click_house/backends/legacy.rb | 4 +- lib/umbrellio_utils/control.rb | 4 +- lib/umbrellio_utils/database.rb | 7 +- lib/umbrellio_utils/http_client.rb | 12 +- lib/umbrellio_utils/sql.rb | 16 +- spec/support/database.rb | 3 +- spec/umbrellio_utils/clickhouse_spec.rb | 14 +- umbrellio_utils.gemspec | 2 +- 13 files changed, 215 insertions(+), 224 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a307874..67bd9ee 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - ruby: ["3.1", "3.2", "3.3", "3.4"] + ruby: ["3.3", "3.4", "4.0"] env: PGHOST: pg diff --git a/.rubocop.yml b/.rubocop.yml index 783ce52..3c60a3c 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -3,7 +3,7 @@ inherit_gem: AllCops: DisplayCopNames: true - TargetRubyVersion: 3.1 + TargetRubyVersion: 3.3 SuggestExtensions: false Naming/MethodParameterName: diff --git a/Gemfile.lock b/Gemfile.lock index 5941539..373044a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,11 +1,12 @@ GIT remote: https://github.com/umbrellio/click_house.git - revision: 1bbf8cb909a248b401d0ba9a9f6f1de2e2c068db + revision: a1ff1df17c8cdd74e0f1b010c5e7a686d6521536 branch: master specs: - click_house (2.1.2) + click_house (2.2.1) activesupport - faraday (>= 1.7, < 3) + faraday (>= 2, < 3) + faraday-http PATH remote: . @@ -16,186 +17,172 @@ PATH GEM remote: https://rubygems.org/ specs: - actioncable (7.2.2.2) - actionpack (= 7.2.2.2) - activesupport (= 7.2.2.2) + action_text-trix (2.1.18) + railties + actioncable (8.1.3) + actionpack (= 8.1.3) + activesupport (= 8.1.3) nio4r (~> 2.0) websocket-driver (>= 0.6.1) zeitwerk (~> 2.6) - actionmailbox (7.2.2.2) - actionpack (= 7.2.2.2) - activejob (= 7.2.2.2) - activerecord (= 7.2.2.2) - activestorage (= 7.2.2.2) - activesupport (= 7.2.2.2) + actionmailbox (8.1.3) + actionpack (= 8.1.3) + activejob (= 8.1.3) + activerecord (= 8.1.3) + activestorage (= 8.1.3) + activesupport (= 8.1.3) mail (>= 2.8.0) - actionmailer (7.2.2.2) - actionpack (= 7.2.2.2) - actionview (= 7.2.2.2) - activejob (= 7.2.2.2) - activesupport (= 7.2.2.2) + actionmailer (8.1.3) + actionpack (= 8.1.3) + actionview (= 8.1.3) + activejob (= 8.1.3) + activesupport (= 8.1.3) mail (>= 2.8.0) rails-dom-testing (~> 2.2) - actionpack (7.2.2.2) - actionview (= 7.2.2.2) - activesupport (= 7.2.2.2) + actionpack (8.1.3) + actionview (= 8.1.3) + activesupport (= 8.1.3) nokogiri (>= 1.8.5) - racc - rack (>= 2.2.4, < 3.2) + rack (>= 2.2.4) rack-session (>= 1.0.1) rack-test (>= 0.6.3) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) useragent (~> 0.16) - actiontext (7.2.2.2) - actionpack (= 7.2.2.2) - activerecord (= 7.2.2.2) - activestorage (= 7.2.2.2) - activesupport (= 7.2.2.2) + actiontext (8.1.3) + action_text-trix (~> 2.1.15) + actionpack (= 8.1.3) + activerecord (= 8.1.3) + activestorage (= 8.1.3) + activesupport (= 8.1.3) globalid (>= 0.6.0) nokogiri (>= 1.8.5) - actionview (7.2.2.2) - activesupport (= 7.2.2.2) + actionview (8.1.3) + activesupport (= 8.1.3) builder (~> 3.1) erubi (~> 1.11) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) - activejob (7.2.2.2) - activesupport (= 7.2.2.2) + activejob (8.1.3) + activesupport (= 8.1.3) globalid (>= 0.3.6) - activemodel (7.2.2.2) - activesupport (= 7.2.2.2) - activerecord (7.2.2.2) - activemodel (= 7.2.2.2) - activesupport (= 7.2.2.2) + activemodel (8.1.3) + activesupport (= 8.1.3) + activerecord (8.1.3) + activemodel (= 8.1.3) + activesupport (= 8.1.3) timeout (>= 0.4.0) - activestorage (7.2.2.2) - actionpack (= 7.2.2.2) - activejob (= 7.2.2.2) - activerecord (= 7.2.2.2) - activesupport (= 7.2.2.2) + activestorage (8.1.3) + actionpack (= 8.1.3) + activejob (= 8.1.3) + activerecord (= 8.1.3) + activesupport (= 8.1.3) marcel (~> 1.0) - activesupport (7.2.2.2) + activesupport (8.1.3) base64 - benchmark (>= 0.3) bigdecimal concurrent-ruby (~> 1.0, >= 1.3.1) connection_pool (>= 2.2.5) drb i18n (>= 1.6, < 2) + json logger (>= 1.4.2) minitest (>= 5.1) securerandom (>= 0.3) tzinfo (~> 2.0, >= 2.0.5) - addressable (2.8.7) - public_suffix (>= 2.0.2, < 7.0) - amazing_print (1.8.1) - amq-protocol (2.3.4) + uri (>= 0.13.1) + amazing_print (2.0.0) + amq-protocol (2.8.0) ast (2.4.3) base64 (0.3.0) - benchmark (0.4.1) - bigdecimal (3.2.3) + bigdecimal (4.1.2) builder (3.3.0) bunny (2.24.0) amq-protocol (~> 2.3) sorted_set (~> 1, >= 1.0.2) - cgi (0.5.0) - ci-helper (0.7.0) + ci-helper (0.10.0) colorize (~> 1.1) dry-inflector (~> 1.0) umbrellio-sequel-plugins (~> 0.14) - clickhouse-native (0.5.0) - connection_pool (>= 2.4) - clickhouse-native (0.5.0-aarch64-linux-gnu) - connection_pool (>= 2.4) - clickhouse-native (0.5.0-arm64-darwin) - connection_pool (>= 2.4) - clickhouse-native (0.5.0-x86_64-darwin) - connection_pool (>= 2.4) - clickhouse-native (0.5.0-x86_64-linux-gnu) - connection_pool (>= 2.4) + clickhouse-native (0.7.0) + connection_pool (>= 2.5.4) + clickhouse-native (0.7.0-aarch64-linux-gnu) + connection_pool (>= 2.5.4) + clickhouse-native (0.7.0-arm64-darwin) + connection_pool (>= 2.5.4) + clickhouse-native (0.7.0-x86_64-darwin) + connection_pool (>= 2.5.4) + clickhouse-native (0.7.0-x86_64-linux-gnu) + connection_pool (>= 2.5.4) coderay (1.1.3) colorize (1.1.0) - concurrent-ruby (1.3.5) - connection_pool (2.5.4) + concurrent-ruby (1.3.6) + connection_pool (3.0.2) crass (1.0.6) csv (3.3.5) - date (3.4.1) + date (3.5.1) diff-lcs (1.6.2) docile (1.4.1) domain_name (0.6.20240107) drb (2.2.3) - dry-inflector (1.2.0) - erb (4.0.4) - cgi (>= 0.3.3) + dry-inflector (1.3.1) + erb (6.0.4) erubi (1.13.1) - faraday (2.14.0) + faraday (2.14.1) faraday-net_http (>= 2.0, < 3.5) json logger + faraday-http (2.1.0) + faraday (~> 2.5) + http (>= 4.0, < 7) faraday-net_http (3.4.2) net-http (~> 0.5) - ffi (1.17.2) - ffi (1.17.2-aarch64-linux-gnu) - ffi (1.17.2-aarch64-linux-musl) - ffi (1.17.2-arm-linux-gnu) - ffi (1.17.2-arm-linux-musl) - ffi (1.17.2-arm64-darwin) - ffi (1.17.2-x86_64-darwin) - ffi (1.17.2-x86_64-linux-gnu) - ffi (1.17.2-x86_64-linux-musl) - ffi-compiler (1.3.2) - ffi (>= 1.15.5) - rake globalid (1.3.0) activesupport (>= 6.1) - http (5.3.1) - addressable (~> 2.8) + http (6.0.3) http-cookie (~> 1.0) - http-form_data (~> 2.2) - llhttp-ffi (~> 0.5.0) - http-cookie (1.1.0) + llhttp (~> 0.6.1) + http-cookie (1.1.6) domain_name (~> 0.5) - http-form_data (2.3.0) - i18n (1.14.7) + i18n (1.14.8) concurrent-ruby (~> 1.0) - io-console (0.8.1) - irb (1.15.2) + io-console (0.8.2) + irb (1.18.0) pp (>= 0.6.0) + prism (>= 1.3.0) rdoc (>= 4.0.0) reline (>= 0.4.2) - json (2.15.0) - kicks (3.2.0) - bunny (~> 2.19) + json (2.19.4) + kicks (3.3.0) + bunny (~> 2.24) concurrent-ruby (~> 1.0) rake (>= 12.3, < 14.0) serverengine (~> 2.1) thor language_server-protocol (3.17.0.5) lint_roller (1.1.0) - llhttp-ffi (0.5.1) - ffi-compiler (~> 1.0) - rake (~> 13.0) + llhttp (0.6.1) logger (1.7.0) - loofah (2.24.1) + loofah (2.25.1) crass (~> 1.0.2) nokogiri (>= 1.12.0) - mail (2.8.1) + mail (2.9.0) + logger mini_mime (>= 0.1.1) net-imap net-pop net-smtp marcel (1.1.0) - memery (1.7.0) + memery (1.8.0) method_source (1.1.0) mini_mime (1.1.5) - mini_portile2 (2.8.9) minitest (6.0.5) drb (~> 2.0) prism (~> 1.5) - net-http (0.8.0) + msgpack (1.8.0) + net-http (0.9.1) uri (>= 0.11.1) - net-imap (0.5.10) + net-imap (0.6.4) date net-protocol net-pop (0.1.2) @@ -204,105 +191,106 @@ GEM timeout net-smtp (0.5.1) net-protocol - nio4r (2.7.4) - nokogiri (1.18.10) - mini_portile2 (~> 2.8.2) + nio4r (2.7.5) + nokogiri (1.19.3-aarch64-linux-gnu) racc (~> 1.4) - nokogiri (1.18.10-aarch64-linux-gnu) + nokogiri (1.19.3-aarch64-linux-musl) racc (~> 1.4) - nokogiri (1.18.10-aarch64-linux-musl) + nokogiri (1.19.3-arm-linux-gnu) racc (~> 1.4) - nokogiri (1.18.10-arm-linux-gnu) + nokogiri (1.19.3-arm-linux-musl) racc (~> 1.4) - nokogiri (1.18.10-arm-linux-musl) + nokogiri (1.19.3-arm64-darwin) racc (~> 1.4) - nokogiri (1.18.10-arm64-darwin) + nokogiri (1.19.3-x86_64-darwin) racc (~> 1.4) - nokogiri (1.18.10-x86_64-darwin) + nokogiri (1.19.3-x86_64-linux-gnu) racc (~> 1.4) - nokogiri (1.18.10-x86_64-linux-gnu) - racc (~> 1.4) - nokogiri (1.18.10-x86_64-linux-musl) + nokogiri (1.19.3-x86_64-linux-musl) racc (~> 1.4) nori (2.7.1) bigdecimal - parallel (1.27.0) - parser (3.3.9.0) + parallel (1.28.0) + parser (3.3.11.1) ast (~> 2.4.1) racc - pg (1.6.2) - pg (1.6.2-aarch64-linux) - pg (1.6.2-aarch64-linux-musl) - pg (1.6.2-arm64-darwin) - pg (1.6.2-x86_64-darwin) - pg (1.6.2-x86_64-linux) - pg (1.6.2-x86_64-linux-musl) - pp (0.6.2) + pg (1.6.3) + pg (1.6.3-aarch64-linux) + pg (1.6.3-aarch64-linux-musl) + pg (1.6.3-arm64-darwin) + pg (1.6.3-x86_64-darwin) + pg (1.6.3-x86_64-linux) + pg (1.6.3-x86_64-linux-musl) + pp (0.6.3) prettyprint prettyprint (0.2.0) - prism (1.5.1) - pry (0.15.2) + prism (1.9.0) + pry (0.16.0) coderay (~> 1.1) method_source (~> 1.0) - psych (5.2.6) + reline (>= 0.6.0) + psych (5.3.1) date stringio - public_suffix (6.0.2) - rabbit_messaging (1.7.0) + rabbit_messaging (1.9.0) bunny (~> 2.0) kicks + msgpack + zlib racc (1.8.1) - rack (3.1.16) - rack-session (2.1.1) + rack (3.2.6) + rack-session (2.1.2) base64 (>= 0.1.0) rack (>= 3.0.0) rack-test (2.2.0) rack (>= 1.3) - rackup (2.2.1) + rackup (2.3.1) rack (>= 3) - rails (7.2.2.2) - actioncable (= 7.2.2.2) - actionmailbox (= 7.2.2.2) - actionmailer (= 7.2.2.2) - actionpack (= 7.2.2.2) - actiontext (= 7.2.2.2) - actionview (= 7.2.2.2) - activejob (= 7.2.2.2) - activemodel (= 7.2.2.2) - activerecord (= 7.2.2.2) - activestorage (= 7.2.2.2) - activesupport (= 7.2.2.2) + rails (8.1.3) + actioncable (= 8.1.3) + actionmailbox (= 8.1.3) + actionmailer (= 8.1.3) + actionpack (= 8.1.3) + actiontext (= 8.1.3) + actionview (= 8.1.3) + activejob (= 8.1.3) + activemodel (= 8.1.3) + activerecord (= 8.1.3) + activestorage (= 8.1.3) + activesupport (= 8.1.3) bundler (>= 1.15.0) - railties (= 7.2.2.2) + railties (= 8.1.3) rails-dom-testing (2.3.0) activesupport (>= 5.0.0) minitest nokogiri (>= 1.6) - rails-html-sanitizer (1.6.2) - loofah (~> 2.21) + rails-html-sanitizer (1.7.0) + loofah (~> 2.25) nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0) - railties (7.2.2.2) - actionpack (= 7.2.2.2) - activesupport (= 7.2.2.2) + railties (8.1.3) + actionpack (= 8.1.3) + activesupport (= 8.1.3) irb (~> 1.13) rackup (>= 1.0.0) rake (>= 12.2) thor (~> 1.0, >= 1.2.2) + tsort (>= 0.2) zeitwerk (~> 2.6) rainbow (3.1.1) - rake (13.3.0) + rake (13.4.2) rbtree (0.4.6) - rdoc (6.14.2) + rdoc (7.2.0) erb psych (>= 4.0.0) - regexp_parser (2.11.3) - reline (0.6.2) + tsort + regexp_parser (2.12.0) + reline (0.6.3) io-console (~> 0.5) - rspec (3.13.1) + rspec (3.13.2) rspec-core (~> 3.13.0) rspec-expectations (~> 3.13.0) rspec-mocks (~> 3.13.0) - rspec-core (3.13.5) + rspec-core (3.13.6) rspec-support (~> 3.13.0) rspec-expectations (3.13.5) diff-lcs (>= 1.2.0, < 2.0) @@ -310,11 +298,11 @@ GEM rspec-json_matcher (0.2.0) amazing_print json - rspec-mocks (3.13.5) + rspec-mocks (3.13.8) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.13.0) - rspec-support (3.13.6) - rubocop (1.80.2) + rspec-support (3.13.7) + rubocop (1.84.2) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -322,28 +310,28 @@ GEM parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 2.9.3, < 3.0) - rubocop-ast (>= 1.46.0, < 2.0) + rubocop-ast (>= 1.49.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.47.1) + rubocop-ast (1.49.1) parser (>= 3.3.7.2) - prism (~> 1.4) - rubocop-config-umbrellio (1.80.107) - rubocop (~> 1.80.0) - rubocop-factory_bot (~> 2.27.0) + prism (~> 1.7) + rubocop-config-umbrellio (1.84.118) + rubocop (~> 1.84.0) + rubocop-factory_bot (~> 2.28.0) rubocop-performance (~> 1.26.0) - rubocop-rails (~> 2.33.0) + rubocop-rails (~> 2.34.0) rubocop-rake (~> 0.7.0) - rubocop-rspec (~> 3.7.0) + rubocop-rspec (~> 3.9.0) rubocop-sequel (~> 0.4.0) - rubocop-factory_bot (2.27.1) + rubocop-factory_bot (2.28.0) lint_roller (~> 1.1) rubocop (~> 1.72, >= 1.72.1) - rubocop-performance (1.26.0) + rubocop-performance (1.26.1) lint_roller (~> 1.1) rubocop (>= 1.75.0, < 2.0) - rubocop-ast (>= 1.44.0, < 2.0) - rubocop-rails (2.33.4) + rubocop-ast (>= 1.47.1, < 2.0) + rubocop-rails (2.34.3) activesupport (>= 4.2.0) lint_roller (~> 1.1) rack (>= 1.1) @@ -352,18 +340,18 @@ GEM rubocop-rake (0.7.1) lint_roller (~> 1.1) rubocop (>= 1.72.1) - rubocop-rspec (3.7.0) + rubocop-rspec (3.9.0) lint_roller (~> 1.1) - rubocop (~> 1.72, >= 1.72.1) + rubocop (~> 1.81) rubocop-sequel (0.4.1) lint_roller (~> 1.1) rubocop (>= 1.72.1, < 2) ruby-progressbar (1.13.0) securerandom (0.4.1) self_data (1.3.0) - semantic_logger (4.17.0) + semantic_logger (4.18.0) concurrent-ruby (~> 1.0) - sequel (5.96.0) + sequel (5.103.0) bigdecimal sequel-batches (2.0.3) sequel @@ -371,7 +359,6 @@ GEM base64 (~> 0.1) logger (~> 1.4) sigdump (~> 0.2.2) - set (1.1.2) sigdump (0.2.5) simplecov (0.22.0) docile (~> 1.1) @@ -380,21 +367,21 @@ GEM simplecov-html (0.13.2) simplecov-lcov (0.9.0) simplecov_json_formatter (0.1.4) - sorted_set (1.0.3) + sorted_set (1.1.0) rbtree - set (~> 1.0) - stringio (3.1.7) - table_sync (6.8.0) + stringio (3.2.0) + table_sync (6.11.0) memery - rabbit_messaging (>= 1.7.0) + rabbit_messaging (>= 1.8.0) rails self_data - thor (1.4.0) - timecop (0.9.10) - timeout (0.4.3) + thor (1.5.0) + timecop (0.9.11) + timeout (0.6.1) + tsort (0.2.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - umbrellio-sequel-plugins (0.17.0) + umbrellio-sequel-plugins (0.17.2) sequel unicode-display_width (3.2.0) unicode-emoji (~> 4.1) @@ -405,8 +392,9 @@ GEM base64 websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) - yard (0.9.37) - zeitwerk (2.6.18) + yard (0.9.43) + zeitwerk (2.7.5) + zlib (3.2.3) PLATFORMS aarch64-linux @@ -448,4 +436,4 @@ DEPENDENCIES yard BUNDLED WITH - 2.6.9 + 2.7.2 diff --git a/lib/umbrellio_utils/cards.rb b/lib/umbrellio_utils/cards.rb index 846675d..22f6921 100644 --- a/lib/umbrellio_utils/cards.rb +++ b/lib/umbrellio_utils/cards.rb @@ -4,10 +4,11 @@ module UmbrellioUtils module Cards extend self - InvalidExpiryDateString = Class.new(StandardError) + class InvalidExpiryDateString < StandardError + end - def parse_expiry_date!(string, **options) - result = parse_expiry_date(string, **options) + def parse_expiry_date!(string, **) + result = parse_expiry_date(string, **) unless result raise InvalidExpiryDateString, "Failed to parse expiry date: #{string.inspect}" diff --git a/lib/umbrellio_utils/click_house/backends/base.rb b/lib/umbrellio_utils/click_house/backends/base.rb index 9aeb44a..3ff271a 100644 --- a/lib/umbrellio_utils/click_house/backends/base.rb +++ b/lib/umbrellio_utils/click_house/backends/base.rb @@ -122,7 +122,7 @@ def populate_temp_table!(temp_table_name, dataset, schema: "public") end def with_temp_table( - dataset, temp_table_name:, primary_key: [:id], primary_key_types: [:integer], **opts, & + dataset, temp_table_name:, primary_key: [:id], primary_key_types: [:integer], **, & ) unless DB.table_exists?(temp_table_name) UmbrellioUtils::Database.create_temp_table( @@ -130,7 +130,7 @@ def with_temp_table( ) populate_temp_table!(temp_table_name, dataset) end - UmbrellioUtils::Database.with_temp_table(nil, primary_key:, temp_table_name:, **opts, &) + UmbrellioUtils::Database.with_temp_table(nil, primary_key:, temp_table_name:, **, &) end protected diff --git a/lib/umbrellio_utils/click_house/backends/legacy.rb b/lib/umbrellio_utils/click_house/backends/legacy.rb index 87e61e7..68e825f 100644 --- a/lib/umbrellio_utils/click_house/backends/legacy.rb +++ b/lib/umbrellio_utils/click_house/backends/legacy.rb @@ -27,8 +27,8 @@ def query_value(dataset, host: nil, **opts) log_errors(sql) { select_value(sql, host:, **opts) } end - def query_each(dataset, host: nil, **opts, &) - query(dataset, host:, **opts).each(&) + def query_each(dataset, host: nil, **, &) + query(dataset, host:, **).each(&) end def insert(table_name, db_name: self.db_name, rows: []) diff --git a/lib/umbrellio_utils/control.rb b/lib/umbrellio_utils/control.rb index 89ffb57..7c09967 100644 --- a/lib/umbrellio_utils/control.rb +++ b/lib/umbrellio_utils/control.rb @@ -45,8 +45,8 @@ def retry_on_unique_violation( end end - def run_non_critical(rescue_all: false, in_transaction: false, &block) - in_transaction ? DB.transaction(savepoint: true, &block) : yield + def run_non_critical(rescue_all: false, in_transaction: false, &) + in_transaction ? DB.transaction(savepoint: true, &) : yield rescue (rescue_all ? Exception : StandardError) => e Exceptions.notify!(e) nil diff --git a/lib/umbrellio_utils/database.rb b/lib/umbrellio_utils/database.rb index 2d1c1d7..e367a18 100644 --- a/lib/umbrellio_utils/database.rb +++ b/lib/umbrellio_utils/database.rb @@ -4,8 +4,11 @@ module UmbrellioUtils module Database extend self - HandledConstaintError = Class.new(StandardError) - InvalidPkError = Class.new(StandardError) + class HandledConstaintError < StandardError + end + + class InvalidPkError < StandardError + end def handle_constraint_error(constraint_name, &) DB.transaction(savepoint: true, &) diff --git a/lib/umbrellio_utils/http_client.rb b/lib/umbrellio_utils/http_client.rb index 9b866c4..7c55e84 100644 --- a/lib/umbrellio_utils/http_client.rb +++ b/lib/umbrellio_utils/http_client.rb @@ -6,16 +6,16 @@ module UmbrellioUtils class HTTPClient include Singleton - def perform(*args, **kwargs) - client.perform(*args, **kwargs) + def perform(*, **) + client.perform(*, **) end - def perform!(*args, **kwargs) - client.perform!(*args, **kwargs) + def perform!(*, **) + client.perform!(*, **) end - def request(*args, **kwargs) - client.request(*args, **kwargs) + def request(*, **) + client.request(*, **) end private diff --git a/lib/umbrellio_utils/sql.rb b/lib/umbrellio_utils/sql.rb index bd8ae75..e5771d7 100644 --- a/lib/umbrellio_utils/sql.rb +++ b/lib/umbrellio_utils/sql.rb @@ -6,8 +6,8 @@ module SQL UniqueConstraintViolation = Sequel::UniqueConstraintViolation - def [](*args) - Sequel[*args] + def [](*) + Sequel[*] end def func(...) @@ -47,8 +47,8 @@ def or(*conditions) Sequel.|(*Array(conditions.flatten.presence || true)) end - def pg_range(from_value, to_value, **opts) - Sequel::Postgres::PGRange.new(from_value, to_value, **opts) + def pg_range(from_value, to_value, **) + Sequel::Postgres::PGRange.new(from_value, to_value, **) end def pg_range_by_range(range) @@ -79,8 +79,8 @@ def count(expr = nil) expr ? func(:count, expr) : func(:count).* end - def ch_count(*args) - Sequel.function(:count, *args) + def ch_count(*) + Sequel.function(:count, *) end def avg(expr) @@ -107,8 +107,8 @@ def coalesce(*exprs) func(:coalesce, *exprs) end - def coalesce0(*args) - coalesce(*args, 0) + def coalesce0(*) + coalesce(*, 0) end def nullif(main_expr, checking_expr) diff --git a/spec/support/database.rb b/spec/support/database.rb index fc4a558..9903f35 100644 --- a/spec/support/database.rb +++ b/spec/support/database.rb @@ -11,8 +11,7 @@ host: "localhost", port: 5432, ) -rescue Sequel::DatabaseConnectionError => error - puts error +rescue Sequel::DatabaseConnectionError abort "You probably need to create a test database. " \ "Try running the following command: `createdb #{db_name}`" end diff --git a/spec/umbrellio_utils/clickhouse_spec.rb b/spec/umbrellio_utils/clickhouse_spec.rb index 6698aab..6758725 100644 --- a/spec/umbrellio_utils/clickhouse_spec.rb +++ b/spec/umbrellio_utils/clickhouse_spec.rb @@ -58,13 +58,13 @@ specify do expect(ch.describe_table("test")).to eq( [ - codec_expression: "", - comment: "", - default_expression: "", - default_type: "", - name: "id", - ttl_expression: "", - type: "Int32", + { codec_expression: "", + comment: "", + default_expression: "", + default_type: "", + name: "id", + ttl_expression: "", + type: "Int32" }, ], ) end diff --git a/umbrellio_utils.gemspec b/umbrellio_utils.gemspec index d46ce19..90f41e7 100644 --- a/umbrellio_utils.gemspec +++ b/umbrellio_utils.gemspec @@ -12,7 +12,7 @@ Gem::Specification.new do |spec| spec.description = "UmbrellioUtils is collection of utility classes and helpers" spec.homepage = "https://github.com/umbrellio/utils" spec.license = "MIT" - spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0") + spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0") spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/umbrellio/utils" From 5047f73ed4e5fd54e1a7208046fe20ff45a05d12 Mon Sep 17 00:00:00 2001 From: Yuri Smirnov Date: Wed, 29 Apr 2026 12:33:37 +0300 Subject: [PATCH 5/7] wip --- bin/clickhouse-server | 3 +-- spec/support/database.rb | 4 ++-- spec/umbrellio_utils/misc_spec.rb | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/bin/clickhouse-server b/bin/clickhouse-server index 221c58b..e5a4fe5 100755 --- a/bin/clickhouse-server +++ b/bin/clickhouse-server @@ -9,6 +9,7 @@ docker run \ --network host \ --name clickhouse-server \ --ulimit nofile=262144:262144 \ + -e CLICKHOUSE_DB=umbrellio_utils_test \ $CLICKHOUSE_IMAGE_TAG # Wait for ClickHouse server to become available @@ -16,5 +17,3 @@ until docker exec clickhouse-server clickhouse-client --query "SELECT 1" &>/dev/ echo "Waiting for ClickHouse to be ready..." sleep 1 done - -rails ch:create ch:migrate diff --git a/spec/support/database.rb b/spec/support/database.rb index 9903f35..24ddea1 100644 --- a/spec/support/database.rb +++ b/spec/support/database.rb @@ -6,8 +6,8 @@ db_name = "umbrellio_utils_test" DB = Sequel.postgres( "umbrellio_utils_test", - user: ENV.fetch("PGUSER"), - password: ENV.fetch("PGPASSWORD"), + user: ENV.fetch("PGUSER", Etc.getlogin), + password: ENV.fetch("PGPASSWORD", ""), host: "localhost", port: 5432, ) diff --git a/spec/umbrellio_utils/misc_spec.rb b/spec/umbrellio_utils/misc_spec.rb index 32a80eb..bd60515 100644 --- a/spec/umbrellio_utils/misc_spec.rb +++ b/spec/umbrellio_utils/misc_spec.rb @@ -44,7 +44,7 @@ def run! }, event: :table_sync, exchange_name: nil, - headers: {}, + headers: { compress: false }, realtime: true, routing_key: :umbrellio_utils, } From c6ad5788d55e195c938ce8cb65daff21ebe6ee84 Mon Sep 17 00:00:00 2001 From: Yuri Smirnov Date: Wed, 29 Apr 2026 12:34:30 +0300 Subject: [PATCH 6/7] wip --- Gemfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Gemfile b/Gemfile index 9a00535..c081843 100644 --- a/Gemfile +++ b/Gemfile @@ -9,10 +9,7 @@ gem "activesupport" gem "bundler" gem "ci-helper" gem "click_house", github: "umbrellio/click_house", branch: "master" -# clickhouse-native requires Ruby >= 3.3; gate so 3.1/3.2 CI still bundles. -install_if -> { Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.3.0") } do - gem "clickhouse-native" -end +gem "clickhouse-native" gem "csv" gem "http" gem "net-pop" From 1a47a07d74b3bc02fbdbb807c599d697e86535da Mon Sep 17 00:00:00 2001 From: Yuri Smirnov Date: Wed, 29 Apr 2026 12:35:39 +0300 Subject: [PATCH 7/7] wip --- spec/support/database.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/support/database.rb b/spec/support/database.rb index 24ddea1..ba636f0 100644 --- a/spec/support/database.rb +++ b/spec/support/database.rb @@ -11,8 +11,8 @@ host: "localhost", port: 5432, ) -rescue Sequel::DatabaseConnectionError - abort "You probably need to create a test database. " \ +rescue Sequel::DatabaseConnectionError => error + abort "You probably need to create a test database. Error: #{error.inspect}. " \ "Try running the following command: `createdb #{db_name}`" end