Skip to content

Commit

Permalink
Merge pull request #7 from codecov/matt/replace-uuid-and-batch-insert
Browse files Browse the repository at this point in the history
replace UUIDs, implement batch insert
  • Loading branch information
matt-codecov authored Jul 29, 2024
2 parents 553307f + c434444 commit c0a4b65
Show file tree
Hide file tree
Showing 22 changed files with 3,175 additions and 1,666 deletions.
48 changes: 37 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ version = "0.1.0"
edition = "2021"

[dependencies]
rusqlite = { version = "0.30.0", features = ["bundled", "uuid"] }
rusqlite = { version = "0.30.0", features = ["bundled", "limits"] }
rusqlite_migration = { version = "1.1.0", features = ["from-directory"] }
uuid = { version = "1.8.0", features = ["v4"] }
rand = "0.8.5"

# SeaHash chosen due to:
# - widely used
Expand Down
149 changes: 104 additions & 45 deletions migrations/01-init/up.sql
Original file line number Diff line number Diff line change
@@ -1,76 +1,135 @@
-- See `src/report/models.rs` for complete, up-to-date schema documentation.

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE source_file (
-- This should be set to the hash of the `path` column so that we can
-- distribute processing across multiple different hosts and they will
-- all come up with the same ID.
id INTEGER PRIMARY KEY,

path VARCHAR NOT NULL
) WITHOUT ROWID;
);

-- TODO: Allow distinguishing between raw reports within a single upload
-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE raw_upload (
-- This should be set to a random 64-bit integer so that we can
-- distribute processing across multiple different hosts and they will
-- not fight over autoincrementing ID values.
id INTEGER PRIMARY KEY,

timestamp INTEGER,
raw_upload_url VARCHAR,
flags VARCHAR, -- JSON
provider VARCHAR,
build VARCHAR,
name VARCHAR,
job_name VARCHAR,
ci_run_url VARCHAR,
state VARCHAR,
env VARCHAR,
session_type VARCHAR,
session_extras VARCHAR -- JSON,
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE context (
-- This should be set to the hash of the `name` column so that we can
-- distribute processing across multiple different hosts and they will
-- all come up with the same ID.
id INTEGER PRIMARY KEY,

context_type VARCHAR NOT NULL,
name VARCHAR NOT NULL
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE context_assoc (
context_id INTEGER REFERENCES context(id) NOT NULL,

raw_upload_id INTEGER NOT NULL,
local_sample_id INTEGER,
local_span_id INTEGER,

-- TODO: Figure out how to re-enable these
-- FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
-- FOREIGN KEY (raw_upload_id, local_span_id) REFERENCES span_data(raw_upload_id, local_span_id),

PRIMARY KEY (context_id, raw_upload_id, local_sample_id, local_span_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE coverage_sample (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,

-- This should be an application-managed auto-incremented integer.
local_sample_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
line_no INTEGER NOT NULL,

coverage_type VARCHAR NOT NULL,
hits INTEGER,
hit_branches INTEGER,
total_branches INTEGER
) WITHOUT ROWID;
total_branches INTEGER,

PRIMARY KEY (raw_upload_id, local_sample_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE branches_data (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,
local_sample_id INTEGER NOT NULL,

-- This should be an application-managed auto-incremented integer.
local_branch_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
sample_id BLOB REFERENCES coverage_sample(id) NOT NULL,

hits INTEGER NOT NULL,
branch_format VARCHAR NOT NULL,
branch VARCHAR NOT NULL
) WITHOUT ROWID;
branch VARCHAR NOT NULL,

FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
PRIMARY KEY (raw_upload_id, local_branch_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE method_data (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,
local_sample_id INTEGER NOT NULL,

-- This should be an application-managed auto-incremented integer.
local_method_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
sample_id BLOB REFERENCES coverage_sample(id),
line_no INTEGER,

hit_branches INTEGER,
total_branches INTEGER,
hit_complexity_paths INTEGER,
total_complexity INTEGER
) WITHOUT ROWID;
total_complexity INTEGER,

FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
PRIMARY KEY (raw_upload_id, local_method_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE span_data (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,
local_sample_id INTEGER,

-- This should be an application-managed auto-incremented integer.
local_span_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
sample_id BLOB REFERENCES coverage_sample(id),

hits INTEGER NOT NULL,
start_line INTEGER,
start_col INTEGER,
end_line INTEGER,
end_col INTEGER
) WITHOUT ROWID;

CREATE TABLE context (
id INTEGER PRIMARY KEY,
context_type VARCHAR NOT NULL,
name VARCHAR NOT NULL
);
end_col INTEGER,

CREATE TABLE context_assoc (
context_id INTEGER NOT NULL,
sample_id BLOB,
branch_id BLOB,
method_id BLOB,
span_id BLOB,
PRIMARY KEY(context_id, sample_id)
) WITHOUT ROWID;

CREATE TABLE upload_details (
context_id INTEGER REFERENCES context(id) NOT NULL,
timestamp INTEGER,
raw_upload_url VARCHAR,
flags VARCHAR, -- JSON
provider VARCHAR,
build VARCHAR,
name VARCHAR,
job_name VARCHAR,
ci_run_url VARCHAR,
state VARCHAR,
env VARCHAR,
session_type VARCHAR,
session_extras VARCHAR -- JSON,
FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
PRIMARY KEY (raw_upload_id, local_span_id)
);
Loading

0 comments on commit c0a4b65

Please sign in to comment.