From 41a6b495ffdf8141e375e001ef8ee5c3d1452bdd Mon Sep 17 00:00:00 2001 From: Anna Selivanova Date: Mon, 17 Jan 2022 10:49:25 +0400 Subject: [PATCH 1/6] Added support for ClickHouse --- README.md | 11 ++++ lib/blazer.rb | 2 + lib/blazer/adapters/clickhouse_adapter.rb | 70 +++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 lib/blazer/adapters/clickhouse_adapter.rb diff --git a/README.md b/README.md index 88b69a126..718e8c196 100644 --- a/README.md +++ b/README.md @@ -566,6 +566,7 @@ data_sources: - [Apache Ignite](#apache-ignite) - [Apache Spark](#apache-spark) - [Cassandra](#cassandra) +- [ClickHouse](#clickhouse) - [Druid](#druid) - [Elasticsearch](#elasticsearch) - [Google BigQuery](#google-bigquery) @@ -713,6 +714,16 @@ data_sources: url: cassandra://user:password@hostname:9042/keyspace ``` +### ClickHouse + +Add [ClickHouse Ruby driver](https://github.com/shlima/click_house) to your Gemfile and set: +```yml +data_sources: + my_source: + adapter: clickhouse + url: http://user:password@hostname:8123/database +``` + ### Druid Enable [SQL support](http://druid.io/docs/latest/querying/sql.html#configuration) on the broker and set: diff --git a/lib/blazer.rb b/lib/blazer.rb index 0c44ed89e..795aef65e 100644 --- a/lib/blazer.rb +++ b/lib/blazer.rb @@ -15,6 +15,7 @@ require "blazer/adapters/athena_adapter" require "blazer/adapters/bigquery_adapter" require "blazer/adapters/cassandra_adapter" +require "blazer/adapters/clickhouse_adapter" require "blazer/adapters/drill_adapter" require "blazer/adapters/druid_adapter" require "blazer/adapters/elasticsearch_adapter" @@ -254,6 +255,7 @@ def self.archive_queries Blazer.register_adapter "athena", Blazer::Adapters::AthenaAdapter Blazer.register_adapter "bigquery", Blazer::Adapters::BigQueryAdapter Blazer.register_adapter "cassandra", Blazer::Adapters::CassandraAdapter +Blazer.register_adapter "clickhouse", Blazer::Adapters::ClickhouseAdapter Blazer.register_adapter "drill", Blazer::Adapters::DrillAdapter Blazer.register_adapter "druid", Blazer::Adapters::DruidAdapter Blazer.register_adapter "elasticsearch", Blazer::Adapters::ElasticsearchAdapter diff --git a/lib/blazer/adapters/clickhouse_adapter.rb b/lib/blazer/adapters/clickhouse_adapter.rb new file mode 100644 index 000000000..e4467dba3 --- /dev/null +++ b/lib/blazer/adapters/clickhouse_adapter.rb @@ -0,0 +1,70 @@ +module Blazer + module Adapters + class ClickhouseAdapter < BaseAdapter + def run_statement(statement, _comment) + columns = [] + rows = [] + error = nil + + begin + response = connection.post(query: { query: statement, default_format: "CSVWithNames" }) + rows = response.body + columns = rows.shift + rescue => e + error = e.message + end + + [columns, rows, error] + end + + def tables + connection.tables + end + + def schema + statement = <<-SQL + SELECT table, name, type + FROM system.columns + WHERE database = '#{connection.config.database}' + ORDER BY table, position + SQL + + response = connection.post(query: { query: statement, default_format: "CSV" }) + response.body + .group_by { |row| row[0] } + .transform_values { |columns| columns.map { |c| { name: c[1], data_type: c[2] } } } + .map { |table, columns| { schema: "public", table: table, columns: columns } } + end + + def preview_statement + "SELECT * FROM {table} LIMIT 10" + end + + def explain(statement) + connection.explain(statement) + end + + protected + + def connection + @connection ||= ClickHouse::Connection.new(config) + end + + def config + @config ||= begin + uri = URI.parse(settings["url"]) + options = { + scheme: uri.scheme, + host: uri.host, + port: uri.port, + username: uri.user, + password: uri.password, + database: uri.path.split("/").last + }.compact + + ClickHouse::Config.new(**options) + end + end + end + end +end From 3910dee5f8e177491aabe2d1a914ba69186fa740 Mon Sep 17 00:00:00 2001 From: Anna Selivanova Date: Thu, 20 Jan 2022 17:02:07 +0400 Subject: [PATCH 2/6] Add support for ActiveRecord driver for ClickHouse --- README.md | 2 +- lib/blazer/adapters/clickhouse_adapter.rb | 82 ++++++++++++++++++----- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 718e8c196..a4a8a4ac6 100644 --- a/README.md +++ b/README.md @@ -716,7 +716,7 @@ data_sources: ### ClickHouse -Add [ClickHouse Ruby driver](https://github.com/shlima/click_house) to your Gemfile and set: +Add [ClickHouse Ruby driver](https://github.com/shlima/click_house) OR [Clickhouse::Activerecord](https://github.com/PNixx/clickhouse-activerecord) to your Gemfile and set: ```yml data_sources: my_source: diff --git a/lib/blazer/adapters/clickhouse_adapter.rb b/lib/blazer/adapters/clickhouse_adapter.rb index e4467dba3..dcbad4b28 100644 --- a/lib/blazer/adapters/clickhouse_adapter.rb +++ b/lib/blazer/adapters/clickhouse_adapter.rb @@ -1,14 +1,55 @@ module Blazer module Adapters class ClickhouseAdapter < BaseAdapter + SUPPORTED_DRIVERS_MAPPING = { + "click_house" => ClickHouseDriver, + "clickhouse-activerecord" => ClickhouseActiverecordDriver + }.freeze + + delegate :tables, to: :driver + + # Wrapper for ClickHouse Ruby driver (https://github.com/shlima/click_house) + class ClickHouseDriver + delegate :tables, to: :connection + + def initialize(config) + @config = ClickHouse::Config.new(**config) + end + + def connection + @connection ||= ClickHouse::Connection.new(@config) + end + + def execute(statement, format) + connection.post(query: { query: statement, default_format: format }).body + end + end + + # Wrapper for Clickhouse::Activerecord driver (https://github.com/PNixx/clickhouse-activerecord) + class ClickhouseActiverecordDriver + delegate :tables, to: :connection + + def initialize(config) + @config = config + end + + def connection + @connection ||= ActiveRecord::Base.clickhouse_connection(@config) + end + + def execute(statement, format) + body = connection.do_execute(statement, format: format) + format.in?(%w[CSV CSVWithNames]) ? CSV.parse(body) : body + end + end + def run_statement(statement, _comment) columns = [] rows = [] error = nil begin - response = connection.post(query: { query: statement, default_format: "CSVWithNames" }) - rows = response.body + rows = driver.execute(statement, "CSVWithNames") columns = rows.shift rescue => e error = e.message @@ -17,23 +58,18 @@ def run_statement(statement, _comment) [columns, rows, error] end - def tables - connection.tables - end - def schema statement = <<-SQL SELECT table, name, type FROM system.columns - WHERE database = '#{connection.config.database}' + WHERE database = '#{config[:database]}' ORDER BY table, position SQL - response = connection.post(query: { query: statement, default_format: "CSV" }) - response.body - .group_by { |row| row[0] } - .transform_values { |columns| columns.map { |c| { name: c[1], data_type: c[2] } } } - .map { |table, columns| { schema: "public", table: table, columns: columns } } + rows = driver.execute(statement, "CSV") + rows.group_by { |row| row[0] } + .transform_values { |columns| columns.map { |c| { name: c[1], data_type: c[2] } } } + .map { |table, columns| { schema: "public", table: table, columns: columns } } end def preview_statement @@ -41,19 +77,24 @@ def preview_statement end def explain(statement) - connection.explain(statement) + driver.execute("EXPLAIN #{statement.gsub(/\A(\s*EXPLAIN)/io, '')}", "TSV") end protected - def connection - @connection ||= ClickHouse::Connection.new(config) + def driver + @driver ||= begin + driver = SUPPORTED_DRIVERS_MAPPING.keys.find { |driver| installed?(driver) } + raise Blazer::Error, "ClickHouse driver not installed!" unless driver + + SUPPORTED_DRIVERS_MAPPING[driver].new(config) + end end def config @config ||= begin uri = URI.parse(settings["url"]) - options = { + { scheme: uri.scheme, host: uri.host, port: uri.port, @@ -61,10 +102,15 @@ def config password: uri.password, database: uri.path.split("/").last }.compact - - ClickHouse::Config.new(**options) end end + + def installed?(driver_name) + Gem::Specification.find_by_name(driver_name) + true + rescue Gem::LoadError + false + end end end end From cc7fe03bda047db3a31cfcc1a8580257ac845ec1 Mon Sep 17 00:00:00 2001 From: Anna Selivanova Date: Sun, 23 Jan 2022 21:33:54 +0400 Subject: [PATCH 3/6] Fix Date and Time columns typecasting --- lib/blazer/adapters/clickhouse_adapter.rb | 81 ++++++++++++----------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/lib/blazer/adapters/clickhouse_adapter.rb b/lib/blazer/adapters/clickhouse_adapter.rb index dcbad4b28..9fd15707b 100644 --- a/lib/blazer/adapters/clickhouse_adapter.rb +++ b/lib/blazer/adapters/clickhouse_adapter.rb @@ -2,55 +2,21 @@ module Blazer module Adapters class ClickhouseAdapter < BaseAdapter SUPPORTED_DRIVERS_MAPPING = { - "click_house" => ClickHouseDriver, - "clickhouse-activerecord" => ClickhouseActiverecordDriver + "click_house" => "ClickHouseDriver", + "clickhouse-activerecord" => "ClickhouseActiverecordDriver" }.freeze delegate :tables, to: :driver - # Wrapper for ClickHouse Ruby driver (https://github.com/shlima/click_house) - class ClickHouseDriver - delegate :tables, to: :connection - - def initialize(config) - @config = ClickHouse::Config.new(**config) - end - - def connection - @connection ||= ClickHouse::Connection.new(@config) - end - - def execute(statement, format) - connection.post(query: { query: statement, default_format: format }).body - end - end - - # Wrapper for Clickhouse::Activerecord driver (https://github.com/PNixx/clickhouse-activerecord) - class ClickhouseActiverecordDriver - delegate :tables, to: :connection - - def initialize(config) - @config = config - end - - def connection - @connection ||= ActiveRecord::Base.clickhouse_connection(@config) - end - - def execute(statement, format) - body = connection.do_execute(statement, format: format) - format.in?(%w[CSV CSVWithNames]) ? CSV.parse(body) : body - end - end - def run_statement(statement, _comment) columns = [] rows = [] error = nil begin - rows = driver.execute(statement, "CSVWithNames") - columns = rows.shift + data = driver.select_all(statement) + columns = data.first.keys + rows = data.map(&:values) rescue => e error = e.message end @@ -87,7 +53,7 @@ def driver driver = SUPPORTED_DRIVERS_MAPPING.keys.find { |driver| installed?(driver) } raise Blazer::Error, "ClickHouse driver not installed!" unless driver - SUPPORTED_DRIVERS_MAPPING[driver].new(config) + "Blazer::Adapters::#{SUPPORTED_DRIVERS_MAPPING[driver]}".constantize.new(config) end end @@ -112,5 +78,40 @@ def installed?(driver_name) false end end + + # Wrapper for ClickHouse Ruby driver (https://github.com/shlima/click_house) + class ClickHouseDriver + delegate :tables, :select_all, to: :connection + + def initialize(config) + @config = ClickHouse::Config.new(**config) + end + + def connection + @connection ||= ClickHouse::Connection.new(@config) + end + + def execute(statement, format) + connection.post(query: { query: statement, default_format: format }).body + end + end + + # Wrapper for Clickhouse::Activerecord driver (https://github.com/PNixx/clickhouse-activerecord) + class ClickhouseActiverecordDriver + delegate :tables, :select_all, to: :connection + + def initialize(config) + @config = config.merge(ssl: config[:port] == 8443) + end + + def connection + @connection ||= ActiveRecord::Base.clickhouse_connection(@config) + end + + def execute(statement, format) + body = connection.do_execute(statement, format: format) + format == "CSV" ? CSV.parse(body) : body + end + end end end From 57d08daeca26af6631095d62e45b006ba5a177c9 Mon Sep 17 00:00:00 2001 From: Anna Selivanova Date: Sun, 6 Feb 2022 21:35:46 +0400 Subject: [PATCH 4/6] Drop clickhouse-activerecord support; extend config --- README.md | 5 +- lib/blazer/adapters/clickhouse_adapter.rb | 83 +++++------------------ 2 files changed, 22 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index a4a8a4ac6..50fc175b6 100644 --- a/README.md +++ b/README.md @@ -716,12 +716,15 @@ data_sources: ### ClickHouse -Add [ClickHouse Ruby driver](https://github.com/shlima/click_house) OR [Clickhouse::Activerecord](https://github.com/PNixx/clickhouse-activerecord) to your Gemfile and set: +Add [ClickHouse Ruby driver](https://github.com/shlima/click_house) to your Gemfile and set: ```yml data_sources: my_source: adapter: clickhouse url: http://user:password@hostname:8123/database + + # optional settings + ssl_verify: true # false by default ``` ### Druid diff --git a/lib/blazer/adapters/clickhouse_adapter.rb b/lib/blazer/adapters/clickhouse_adapter.rb index 9fd15707b..5f1cee677 100644 --- a/lib/blazer/adapters/clickhouse_adapter.rb +++ b/lib/blazer/adapters/clickhouse_adapter.rb @@ -1,20 +1,13 @@ module Blazer module Adapters class ClickhouseAdapter < BaseAdapter - SUPPORTED_DRIVERS_MAPPING = { - "click_house" => "ClickHouseDriver", - "clickhouse-activerecord" => "ClickhouseActiverecordDriver" - }.freeze - - delegate :tables, to: :driver - def run_statement(statement, _comment) columns = [] rows = [] error = nil begin - data = driver.select_all(statement) + data = connection.select_all(statement) columns = data.first.keys rows = data.map(&:values) rescue => e @@ -24,18 +17,23 @@ def run_statement(statement, _comment) [columns, rows, error] end + def tables + connection.tables + end + def schema statement = <<-SQL SELECT table, name, type FROM system.columns - WHERE database = '#{config[:database]}' + WHERE database = currentDatabase() ORDER BY table, position SQL - rows = driver.execute(statement, "CSV") - rows.group_by { |row| row[0] } - .transform_values { |columns| columns.map { |c| { name: c[1], data_type: c[2] } } } - .map { |table, columns| { schema: "public", table: table, columns: columns } } + response = connection.post(query: { query: statement, default_format: "CSV" }) + response.body + .group_by { |row| row[0] } + .transform_values { |columns| columns.map { |c| { name: c[1], data_type: c[2] } } } + .map { |table, columns| { schema: "public", table: table, columns: columns } } end def preview_statement @@ -43,75 +41,30 @@ def preview_statement end def explain(statement) - driver.execute("EXPLAIN #{statement.gsub(/\A(\s*EXPLAIN)/io, '')}", "TSV") + connection.explain(statement) end protected - def driver - @driver ||= begin - driver = SUPPORTED_DRIVERS_MAPPING.keys.find { |driver| installed?(driver) } - raise Blazer::Error, "ClickHouse driver not installed!" unless driver - - "Blazer::Adapters::#{SUPPORTED_DRIVERS_MAPPING[driver]}".constantize.new(config) - end + def connection + @connection ||= ClickHouse::Connection.new(config) end def config @config ||= begin uri = URI.parse(settings["url"]) - { + options = { scheme: uri.scheme, host: uri.host, port: uri.port, username: uri.user, password: uri.password, - database: uri.path.split("/").last + database: uri.path.sub(/\A\//, ""), + ssl_verify: settings.fetch("ssl_verify", false) }.compact + ClickHouse::Config.new(**options) end end - - def installed?(driver_name) - Gem::Specification.find_by_name(driver_name) - true - rescue Gem::LoadError - false - end - end - - # Wrapper for ClickHouse Ruby driver (https://github.com/shlima/click_house) - class ClickHouseDriver - delegate :tables, :select_all, to: :connection - - def initialize(config) - @config = ClickHouse::Config.new(**config) - end - - def connection - @connection ||= ClickHouse::Connection.new(@config) - end - - def execute(statement, format) - connection.post(query: { query: statement, default_format: format }).body - end - end - - # Wrapper for Clickhouse::Activerecord driver (https://github.com/PNixx/clickhouse-activerecord) - class ClickhouseActiverecordDriver - delegate :tables, :select_all, to: :connection - - def initialize(config) - @config = config.merge(ssl: config[:port] == 8443) - end - - def connection - @connection ||= ActiveRecord::Base.clickhouse_connection(@config) - end - - def execute(statement, format) - body = connection.do_execute(statement, format: format) - format == "CSV" ? CSV.parse(body) : body - end end end end From dc4b17f975cfb1e301a3c5fe7f6da194b6a72a80 Mon Sep 17 00:00:00 2001 From: Anna Selivanova Date: Thu, 10 Feb 2022 00:20:39 +0400 Subject: [PATCH 5/6] Convert DateTime columns to Time objects --- lib/blazer/adapters/clickhouse_adapter.rb | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/blazer/adapters/clickhouse_adapter.rb b/lib/blazer/adapters/clickhouse_adapter.rb index 5f1cee677..636e67756 100644 --- a/lib/blazer/adapters/clickhouse_adapter.rb +++ b/lib/blazer/adapters/clickhouse_adapter.rb @@ -1,15 +1,22 @@ module Blazer module Adapters class ClickhouseAdapter < BaseAdapter + DATE_TIME_TYPES = ["DateTime", "DateTime(%s)", "DateTime64(%d, %s)"].freeze + def run_statement(statement, _comment) columns = [] rows = [] error = nil begin - data = connection.select_all(statement) - columns = data.first.keys - rows = data.map(&:values) + result = connection.select_all(statement) + unless result.data.blank? + date_time_columns = result.meta + .select { |column| column["type"].in?(DATE_TIME_TYPES) } + .map { |column| column["name"] } + columns = result.data.first.keys + rows = result.data.map { |row| convert_time_columns(row, date_time_columns).values } + end rescue => e error = e.message end @@ -65,6 +72,13 @@ def config ClickHouse::Config.new(**options) end end + + def convert_time_columns(row, date_time_columns) + time_values = row.slice(*date_time_columns).transform_values!(&:to_time) + row.merge(time_values) + rescue NoMethodError + row + end end end end From bfba2a43ce520195b056a3aa9f4c23e04740f2aa Mon Sep 17 00:00:00 2001 From: Anna Selivanova Date: Sun, 20 Feb 2022 18:20:11 +0400 Subject: [PATCH 6/6] Fix charts --- lib/blazer/adapters/clickhouse_adapter.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/blazer/adapters/clickhouse_adapter.rb b/lib/blazer/adapters/clickhouse_adapter.rb index 636e67756..76698f2f7 100644 --- a/lib/blazer/adapters/clickhouse_adapter.rb +++ b/lib/blazer/adapters/clickhouse_adapter.rb @@ -14,8 +14,8 @@ def run_statement(statement, _comment) date_time_columns = result.meta .select { |column| column["type"].in?(DATE_TIME_TYPES) } .map { |column| column["name"] } - columns = result.data.first.keys - rows = result.data.map { |row| convert_time_columns(row, date_time_columns).values } + columns = result.first.keys + rows = result.map { |row| convert_time_columns(row, date_time_columns).values } end rescue => e error = e.message