Skip to content

Commit

Permalink
Merge pull request #4 from rinsed-org/feat/add-specs
Browse files Browse the repository at this point in the history
Specs
  • Loading branch information
reidnimz authored Nov 21, 2023
2 parents a0b2fa9 + 6b7f85e commit 825febb
Show file tree
Hide file tree
Showing 6 changed files with 287 additions and 1 deletion.
7 changes: 7 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,10 @@ gem "concurrent-ruby"
gem "connection_pool"
gem "jwt"
gem "oj"
gem "rspec"
gem "pry"
gem "dotenv"

group :development do
gem "parallel"
end
25 changes: 25 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,32 @@ PATH
GEM
remote: https://rubygems.org/
specs:
coderay (1.1.3)
concurrent-ruby (1.2.2)
connection_pool (2.4.1)
diff-lcs (1.5.0)
dotenv (2.8.1)
jwt (2.7.1)
method_source (1.0.0)
oj (3.16.1)
parallel (1.23.0)
pry (0.14.2)
coderay (~> 1.1)
method_source (~> 1.0)
rake (13.1.0)
rspec (3.12.0)
rspec-core (~> 3.12.0)
rspec-expectations (~> 3.12.0)
rspec-mocks (~> 3.12.0)
rspec-core (3.12.2)
rspec-support (~> 3.12.0)
rspec-expectations (3.12.3)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.12.0)
rspec-mocks (3.12.6)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.12.0)
rspec-support (3.12.1)

PLATFORMS
arm64-darwin-21
Expand All @@ -20,10 +41,14 @@ DEPENDENCIES
bundler
concurrent-ruby
connection_pool
dotenv
jwt
oj
parallel
pry
rake
rb_snowflake_client!
rspec

BUNDLED WITH
2.4.19
10 changes: 10 additions & 0 deletions lib/rb_snowflake_client/result_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,15 @@ def initialize(partition_count, row_type_data)
end

def []=(index, value)
begin
data[index] = value
rescue Exception => ex
binding.pry
end
end

def get_all_rows
data.map { |part| part.map { |r| wrap_row(r).to_h } }.flatten
end

def each
Expand All @@ -33,6 +41,8 @@ def size
data.map(&:size).sum
end

alias length size

def first
wrap_row(data.first.first)
end
Expand Down
28 changes: 27 additions & 1 deletion lib/rb_snowflake_client/snowflake_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@

require_relative "result_set"

class Snowflake
class Error < StandardError
# This will get pulled through to Sentry, see:
# https://github.com/getsentry/sentry-ruby/blob/11ecd254c0d2cae2b327f0348074e849095aa32d/sentry-ruby/lib/sentry/error_event.rb#L31-L33
attr_reader :sentry_context

def initialize(details)
@sentry_context = details
end
end
end

# TODO: double check that net/http is actually using compression like it should be
class SnowflakeClient
JWT_TOKEN_TTL = 3600 # seconds, this is the max supported by snowflake
Expand All @@ -20,6 +32,17 @@ class SnowflakeClient
MAX_THREADS = 8
THREAD_SCALE_FACTOR = 4 # parition count factor for number of threads (i.e. 2 == once we have 4 partitions, spin up a second thread)

def self.connect
new(
ENV["SNOWFLAKE_URI"],
ENV["SNOWFLAKE_PRIVATE_KEY_PATH"],
ENV["SNOWFLAKE_ORGANIZATION"],
ENV["SNOWFLAKE_ACCOUNT"],
ENV["SNOWFLAKE_USER"],
ENV["SNOWFLAKE_PUBLIC_KEY_FINGERPRINT"],
)
end

# TODO: parameterize warehouse
def initialize(uri, private_key_path, organization, account, user, public_key_fingerprint)
@base_uri = uri
Expand Down Expand Up @@ -99,7 +122,10 @@ def count_statements(query)
end

def handle_errors(response)
raise "Bad response! Got code: #{response.code}, w/ message #{response.body}" unless response.code == "200"
if response.code != "200"
raise Snowflake::Error.new({}),
"Bad response! Got code: #{response.code}, w/ message #{response.body}"
end
end

def request_with_auth_and_headers(connection, request_class, path, body=nil)
Expand Down
204 changes: 204 additions & 0 deletions spec/rb_snowflake_client/client_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
require "spec_helper"

RSpec.describe SnowflakeClient do
describe "#fetch" do
let(:client) { described_class.connect }

let(:query) { "" }
let(:result) { client.query(query) }

context "when the query errors" do
let(:query) { "INVALID QUERY;" }
it "should raise an exception" do
expect { result }.to raise_error do |error|
expect(error).to be_a Snowflake::Error
# TODO: make sure to include query in context
#expect(error.sentry_context).to include(
#sql: query
#)
end
end

context "for unauthorized database" do
let(:query) { "SELECT * FROM TEST_DATABASE.RINSED_WEB_APP.EMAILS LIMIT 1;" }
it "should raise an exception" do
expect { result }.to raise_error do |error|
expect(error).to be_a Snowflake::Error
binding.pry
expect(error.message).to include "'TEST_DATABASE' does not exist or not authorized"
# TODO: make sure to include query in context
#expect(error.sentry_context).to include(
#sql: query
#)
end
end

it "should raise the correct exception for threaded work" do
require "parallel"

Parallel.map((1..3).collect { _1 }, in_threads: 2) do |idx|
c = described_class.connect
query = "SELECT * FROM TEST_DATABASE#{idx}.RINSED_WEB_APP.EMAILS LIMIT 1;"

expect { c.query(query) }.to raise_error do |error|
expect(error).to be_a Snowflake::Error
# TODO: make sure to include query in context
#expect(error.sentry_context).to include(
#sql: query
#)
expect(error.message).to include "TEST_DATABASE#{idx}"
end
end
end
end
end

context "with a simple query returning string" do
let(:query) { "SELECT 1;" }

it "should return a Snowflake::Result" do
expect(result).to be_a(ResultSet)
end

it "should respond to get_all_rows" do
#rows = result.get_all_rows
expect(result.length).to eq(1)
rows = result.get_all_rows
expect(rows).to eq(
# The value should be an integer. TODO: figure out why it's not
[{"1" => "1"}]
)
end

# TODO: if we want this semantics. W/o streaming doesn't feel too great.
#it "should respond to get_all_rows with a block" do
#expect { |b| result.get_all_rows(&b) }.to yield_with_args({"1" => 1})
#end
end

context "with a more complex query" do
# We have setup a simple table in our Snowflake account with the below structure:
# CREATE TABLE ruby_snowflake_client_testing.public.test_datatypes
# (ID int, NAME string, DOB date, CREATED_AT timestamp, COFFES_PER_WEEK float);
# And inserted some test data:
# INSERT INTO test_datatypes
# VALUES (1, 'John Smith', '1990-10-17', current_timestamp(), 3.41),
# (2, 'Jane Smith', '1990-01-09', current_timestamp(), 3.525);
let(:query) { "SELECT * from ruby_snowflake_client_testing.public.test_datatypes;" }
let(:expected_john) do
{
"coffes_per_week" => 3.41,
"id" => 1,
"dob" => be_within(0.01).of(Time.new(1990, 10, 17,0,0,0, 0)),
"created_at" => be_within(0.01).of(Time.new(2023,5,12,4,22,8,0)),
"name" => "John Smith",
}
end
let(:expected_jane) do
{
"coffes_per_week" => 3.525,
"id" => 2,
"dob" => be_within(0.01).of(Time.new(1990,1,9,0,0,0, 0)),
"created_at" => be_within(0.01).of(Time.new(2023,5,12,4,22,8,0)),
"name" => "Jane Smith",
}
end

it "should return 2 rows with the right data types" do
rows = result.get_all_rows
expect(rows.length).to eq(2)
john = rows[0]
jane = rows[1]
expect(john).to match(expected_john)
expect(jane).to match(expected_jane)
end
end

context "with NUMBER and HighPrecision" do
# We have setup a simple table in our Snowflake account with the below structure:
# CREATE TABLE ruby_snowflake_client_testing.public.test_big_datatypes
# (ID NUMBER(38,0), BIGFLOAT NUMBER(8,2));
# And inserted some test data:
# INSERT INTO test_big_datatypes VALUES (1, 8.2549);
let(:query) { "SELECT * from ruby_snowflake_client_testing.public.test_big_datatypes;" }
it "should return 1 row with correct data types" do
rows = result.get_all_rows
expect(rows.length).to eq(1)
expect(rows[0]).to eq({
"id" => 1,
"bigfloat" => 8.25, #precision of only 2 decimals
})
end
end

context "with a large amount of data" do
# We have setup a very simple table with the below statement:
# CREATE TABLE ruby_snowflake_client_testing.public.large_table (ID int PRIMARY KEY, random_text string);
# We than ran a couple of inserts with large number of rows:
# INSERT INTO ruby_snowflake_client_testing.public.large_table
# SELECT random()%50000, randstr(64, random()) FROM table(generator(rowCount => 50000));

let(:limit) { 0 }
let(:query) { "SELECT * FROM ruby_snowflake_client_testing.public.large_table LIMIT #{limit}" }

context "fetching 50k rows" do
let(:limit) { 50_000 }
it "should work" do
rows = result.get_all_rows
expect(rows.length).to eq 50000
expect((-50000...50000)).to include(rows[0]["id"].to_i)
end
end

context "fetching 150k rows x 100 times" do
let(:limit) { 150_000 }
it "should work" do
100.times do |idx|
client = described_class.connect
result = client.query(query)
rows = result.get_all_rows
expect(rows.length).to eq 150000
expect((-50000...50000)).to include(rows[0]["id"].to_i)
end
end
end

context "fetching 150k rows x 10 times - with threads" do
let(:limit) { 150_000 }
it "should work" do
t = []
10.times do |idx|
t << Thread.new do
client = described_class.connect
result = client.query(query)
rows = result.get_all_rows
expect(rows.length).to eq 150000
expect((-50000...50000)).to include(rows[0]["id"].to_i)
end
end

t.map(&:join)
end
end

context "fetching 150k rows x 10 times - with threads & shared client" do
let(:limit) { 150_000 }
it "should work" do
t = []
client = described_class.connect
10.times do |idx|
t << Thread.new do
result = client.query(query)
rows = result.get_all_rows
expect(rows.length).to eq 150000
expect((-50000...50000)).to include(rows[0]["id"].to_i)
end
end

t.map(&:join)
end
end
end
end
end

14 changes: 14 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
require "rb_snowflake_client"
require "rspec"
require "pry"
require "dotenv/load"

RSpec.configure do |config|
config.run_all_when_everything_filtered = true
config.filter_run :focus
config.order = "random"
config.mock_with( :rspec ) do |mock|
mock.syntax = :expect
end
end

0 comments on commit 825febb

Please sign in to comment.