Skip to content

Commit

Permalink
Merge branch 'main' into add-option-to-order-user-project-contributio…
Browse files Browse the repository at this point in the history
…ns-by-most-recently-contributed
  • Loading branch information
yuenmichelle1 committed Oct 18, 2024
2 parents 1e363dd + feff7d2 commit 2796462
Show file tree
Hide file tree
Showing 13 changed files with 366 additions and 72 deletions.
124 changes: 62 additions & 62 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,80 +1,80 @@
GEM
remote: https://rubygems.org/
specs:
actioncable (7.0.8.4)
actionpack (= 7.0.8.4)
activesupport (= 7.0.8.4)
actioncable (7.0.8.5)
actionpack (= 7.0.8.5)
activesupport (= 7.0.8.5)
nio4r (~> 2.0)
websocket-driver (>= 0.6.1)
actionmailbox (7.0.8.4)
actionpack (= 7.0.8.4)
activejob (= 7.0.8.4)
activerecord (= 7.0.8.4)
activestorage (= 7.0.8.4)
activesupport (= 7.0.8.4)
actionmailbox (7.0.8.5)
actionpack (= 7.0.8.5)
activejob (= 7.0.8.5)
activerecord (= 7.0.8.5)
activestorage (= 7.0.8.5)
activesupport (= 7.0.8.5)
mail (>= 2.7.1)
net-imap
net-pop
net-smtp
actionmailer (7.0.8.4)
actionpack (= 7.0.8.4)
actionview (= 7.0.8.4)
activejob (= 7.0.8.4)
activesupport (= 7.0.8.4)
actionmailer (7.0.8.5)
actionpack (= 7.0.8.5)
actionview (= 7.0.8.5)
activejob (= 7.0.8.5)
activesupport (= 7.0.8.5)
mail (~> 2.5, >= 2.5.4)
net-imap
net-pop
net-smtp
rails-dom-testing (~> 2.0)
actionpack (7.0.8.4)
actionview (= 7.0.8.4)
activesupport (= 7.0.8.4)
actionpack (7.0.8.5)
actionview (= 7.0.8.5)
activesupport (= 7.0.8.5)
rack (~> 2.0, >= 2.2.4)
rack-test (>= 0.6.3)
rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.0, >= 1.2.0)
actiontext (7.0.8.4)
actionpack (= 7.0.8.4)
activerecord (= 7.0.8.4)
activestorage (= 7.0.8.4)
activesupport (= 7.0.8.4)
actiontext (7.0.8.5)
actionpack (= 7.0.8.5)
activerecord (= 7.0.8.5)
activestorage (= 7.0.8.5)
activesupport (= 7.0.8.5)
globalid (>= 0.6.0)
nokogiri (>= 1.8.5)
actionview (7.0.8.4)
activesupport (= 7.0.8.4)
actionview (7.0.8.5)
activesupport (= 7.0.8.5)
builder (~> 3.1)
erubi (~> 1.4)
rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.1, >= 1.2.0)
activejob (7.0.8.4)
activesupport (= 7.0.8.4)
activejob (7.0.8.5)
activesupport (= 7.0.8.5)
globalid (>= 0.3.6)
activemodel (7.0.8.4)
activesupport (= 7.0.8.4)
activerecord (7.0.8.4)
activemodel (= 7.0.8.4)
activesupport (= 7.0.8.4)
activestorage (7.0.8.4)
actionpack (= 7.0.8.4)
activejob (= 7.0.8.4)
activerecord (= 7.0.8.4)
activesupport (= 7.0.8.4)
activemodel (7.0.8.5)
activesupport (= 7.0.8.5)
activerecord (7.0.8.5)
activemodel (= 7.0.8.5)
activesupport (= 7.0.8.5)
activestorage (7.0.8.5)
actionpack (= 7.0.8.5)
activejob (= 7.0.8.5)
activerecord (= 7.0.8.5)
activesupport (= 7.0.8.5)
marcel (~> 1.0)
mini_mime (>= 1.1.0)
activesupport (7.0.8.4)
activesupport (7.0.8.5)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 1.6, < 2)
minitest (>= 5.1)
tzinfo (~> 2.0)
ast (2.4.2)
bootsnap (1.16.0)
msgpack (~> 1.2)
builder (3.2.4)
builder (3.3.0)
byebug (11.1.3)
coderay (1.1.3)
composite_primary_keys (14.0.6)
activerecord (~> 7.0.2)
concurrent-ruby (1.3.1)
concurrent-ruby (1.3.4)
crass (1.0.6)
database_cleaner (2.0.2)
database_cleaner-active_record (>= 2, < 3)
Expand All @@ -89,7 +89,7 @@ GEM
deprecate (0.0.0)
diff-lcs (1.5.0)
docile (1.4.0)
erubi (1.12.0)
erubi (1.13.0)
factory_bot (6.2.1)
activesupport (>= 5.0.0)
factory_bot_rails (6.2.0)
Expand Down Expand Up @@ -125,7 +125,7 @@ GEM
faraday (~> 1.0)
globalid (1.2.1)
activesupport (>= 6.1)
i18n (1.14.5)
i18n (1.14.6)
concurrent-ruby (~> 1.0)
io-console (0.6.0)
irb (1.6.4)
Expand All @@ -143,7 +143,7 @@ GEM
marcel (1.0.2)
method_source (1.0.0)
mini_mime (1.1.5)
minitest (5.23.1)
minitest (5.25.1)
msgpack (1.7.0)
multipart-post (2.3.0)
net-imap (0.4.10)
Expand All @@ -157,9 +157,9 @@ GEM
net-protocol
newrelic_rpm (9.5.0)
nio4r (2.7.3)
nokogiri (1.16.5-x86_64-darwin)
nokogiri (1.16.7-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.16.5-x86_64-linux)
nokogiri (1.16.7-x86_64-linux)
racc (~> 1.4)
panoptes-client (1.2.0)
deprecate
Expand All @@ -180,36 +180,36 @@ GEM
nio4r (~> 2.0)
pundit (2.3.0)
activesupport (>= 3.0.0)
racc (1.8.0)
rack (2.2.9)
racc (1.8.1)
rack (2.2.10)
rack-cors (2.0.2)
rack (>= 2.0.0)
rack-test (2.1.0)
rack (>= 1.3)
rails (7.0.8.4)
actioncable (= 7.0.8.4)
actionmailbox (= 7.0.8.4)
actionmailer (= 7.0.8.4)
actionpack (= 7.0.8.4)
actiontext (= 7.0.8.4)
actionview (= 7.0.8.4)
activejob (= 7.0.8.4)
activemodel (= 7.0.8.4)
activerecord (= 7.0.8.4)
activestorage (= 7.0.8.4)
activesupport (= 7.0.8.4)
rails (7.0.8.5)
actioncable (= 7.0.8.5)
actionmailbox (= 7.0.8.5)
actionmailer (= 7.0.8.5)
actionpack (= 7.0.8.5)
actiontext (= 7.0.8.5)
actionview (= 7.0.8.5)
activejob (= 7.0.8.5)
activemodel (= 7.0.8.5)
activerecord (= 7.0.8.5)
activestorage (= 7.0.8.5)
activesupport (= 7.0.8.5)
bundler (>= 1.15.0)
railties (= 7.0.8.4)
railties (= 7.0.8.5)
rails-dom-testing (2.2.0)
activesupport (>= 5.0.0)
minitest
nokogiri (>= 1.6)
rails-html-sanitizer (1.6.0)
loofah (~> 2.21)
nokogiri (~> 1.14)
railties (7.0.8.4)
actionpack (= 7.0.8.4)
activesupport (= 7.0.8.4)
railties (7.0.8.5)
actionpack (= 7.0.8.5)
activesupport (= 7.0.8.5)
method_source
rake (>= 12.2)
thor (~> 1.0)
Expand Down
3 changes: 2 additions & 1 deletion app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def validate_date(date_param)
end

def validate_period
raise ValidationError, 'Invalid bucket option. Valid options for period is day, week, month, or year' unless SelectableWithTimeBucket::TIME_BUCKET_OPTIONS.keys.include? params[:period].downcase.to_sym
params[:period] = params[:period].downcase
raise ValidationError, 'Invalid bucket option. Valid options for period is day, week, month, or year' unless SelectableWithTimeBucket::TIME_BUCKET_OPTIONS.keys.include? params[:period].to_sym
end

def valid_date_range
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

module ClassificationCounts
class HourlyWorkflowClassificationCount < ApplicationRecord
self.table_name = 'hourly_classification_count_per_workflow'
attribute :classification_count, :integer

def readonly?
true
end
end
end
86 changes: 85 additions & 1 deletion app/queries/count_classifications.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,25 @@ def call(params={})
scoped = @counts
scoped = filter_by_workflow_id(scoped, params[:workflow_id])
scoped = filter_by_project_id(scoped, params[:project_id])
filter_by_date_range(scoped, params[:start_date], params[:end_date])
# Because of how the FE, calls out to this endpoint when querying for a project's workflow's classifications count
# And because of our use of Real Time Aggregates
# Querying the DailyClassificationCountByWorkflow becomes not as performant
# Because we are limited in resources, we do the following mitigaion for ONLY querying workflow classification counts:
# 1. Create a New HourlyClassificationCountByWorkflow which is RealTime and Create a Data Retention for this new aggregate (this should limit the amount of data the query planner has to sift through)
# 2. Turn off Real Time aggreation for the DailyClassificationCount
# 3. For workflow classification count queries that include the current date's counts, we query current date's counts via the HourlyClassificationCountByWorkflow and query the DailyClassificationCountByWorkflow for everything before the current date's

if params[:workflow_id].present?
if end_date_includes_today?(params[:end_date])
scoped_upto_yesterday = filter_by_date_range(scoped, params[:start_date], Date.yesterday.to_s)
scoped = include_today_to_scoped(scoped_upto_yesterday, params[:workflow_id], params[:period])
else
scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date])
end
else
scoped = filter_by_date_range(scoped, params[:start_date], params[:end_date])
end
scoped
end

private
Expand All @@ -22,6 +40,72 @@ def initial_scope(relation, period)
relation.select(select_and_time_bucket_by(period, 'classification')).group('period').order('period')
end

def include_today_to_scoped(scoped_upto_yesterday, workflow_id, period)
period = 'year' if period.nil?
todays_classifications = current_date_workflow_classifications(workflow_id)
return scoped_upto_yesterday if todays_classifications.blank?

if scoped_upto_yesterday.blank?
# append new entry where period is start of the period
todays_classifications[0].period = start_of_current_period(period).to_time.utc
return todays_classifications
end

most_recent_date_from_scoped = scoped_upto_yesterday[-1].period.to_date

# If period=week, month, or year, the current date could be part of that week, month or year;
# we check if the current date is part of the period
# if so, we add the count to the most recent period pulled from db
# if not, we append as a new entry for the current period
if today_part_of_recent_period?(most_recent_date_from_scoped, period)
add_todays_counts_to_recent_period_counts(scoped_upto_yesterday, todays_classifications)
else
todays_classifications[0].period = start_of_current_period(period).to_time.utc
append_today_to_scoped(scoped_upto_yesterday, todays_classifications)
end
end

def start_of_current_period(period)
today = Date.today
case period
when 'day'
today
when 'week'
# Returns Monday of current week
today.at_beginning_of_week
when 'month'
today.at_beginning_of_month
when 'year'
today.at_beginning_of_year
end
end

def today_part_of_recent_period?(most_recent_date, period)
most_recent_date == start_of_current_period(period)
end

def append_today_to_scoped(count_records_up_to_yesterday, todays_count)
count_records_up_to_yesterday + todays_count
end

def add_todays_counts_to_recent_period_counts(count_records_up_to_yesterday, todays_count)
current_period_counts = count_records_up_to_yesterday[-1].count + todays_count[0].count
count_records_up_to_yesterday[-1].count = current_period_counts
count_records_up_to_yesterday
end

def current_date_workflow_classifications(workflow_id)
current_day_str = Date.today.to_s
current_hourly_classifications = ClassificationCounts::HourlyWorkflowClassificationCount.select("time_bucket('1 day', hour) AS period, SUM(classification_count)::integer AS count").group('period').order('period').where("hour >= '#{current_day_str}'")
filter_by_workflow_id(current_hourly_classifications, workflow_id)
end

def end_date_includes_today?(end_date)
includes_today = true
includes_today = Date.parse(end_date) >= Date.today if end_date.present?
includes_today
end

def relation(params)
if params[:workflow_id]
ClassificationCounts::DailyWorkflowClassificationCount
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true.

class CreateHourlyWorkflowClassificationCount < ActiveRecord::Migration[7.0]
# we have to disable the migration transaction because creating materialized views within it is not allowed.

# Due to how the front end pulls project stats (and workflow stats) all in one go, we hit performance issues; especially if a project has multiple workflows.
# We have discovered that having a non-realtime/materialized only continous aggregate for our daily workflow count cagg is more performant than real time.
# We plan to do the following:
# - Update the daily_classification_count_per_workflow to be materialized only (i.e. non-realtime)
# - Create a subsequent realtime cagg that buckets hourly that we will create data retention policies for. The plan is for up to 72 hours worth of hourly workflow classification counts of data.
# - Update workflow query to first query the daily counts first and the query the hourly counts for just the specific date of now.
disable_ddl_transaction!
def up
execute <<~SQL
create materialized view hourly_classification_count_per_workflow
with (
timescaledb.continuous
) as
select
time_bucket('1 hour', event_time) as hour,
workflow_id,
count(*) as classification_count
from classification_events where event_time > now() - INTERVAL '5 days'
group by hour, workflow_id;
SQL
end

def down
execute <<~SQL
DROP materialized view hourly_classification_count_per_workflow;
SQL
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# frozen_string_literal: true

class AddRefreshPolicyForHourlyWorkflowCount < ActiveRecord::Migration[7.0]
disable_ddl_transaction!
def up
execute <<~SQL
SELECT add_continuous_aggregate_policy('hourly_classification_count_per_workflow',start_offset => INTERVAL '5 days', end_offset => INTERVAL '30 minutes', schedule_interval => INTERVAL '1 h');
SQL
end

def down
execute <<~SQL
SELECT remove_continuous_aggregate_policy('hourly_classification_count_per_workflow');
SQL
end
end
Loading

0 comments on commit 2796462

Please sign in to comment.