From 85baf3fdb79475e13918e51ac707629655c8ba55 Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Sun, 14 Aug 2022 22:17:04 +0200 Subject: [PATCH 1/8] Add elasticsearch dependency --- Gemfile | 4 ++++ Gemfile.lock | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/Gemfile b/Gemfile index 228b43a1e..f561b496f 100644 --- a/Gemfile +++ b/Gemfile @@ -72,6 +72,10 @@ gem 'stripe', '~> 5.28' # EeeMAILS! gem 'premailer-rails', '~> 1.11' +# Better searching +gem 'elasticsearch-model', '~> 7.2' +gem 'elasticsearch-rails', '~> 7.2' + group :test do gem 'minitest', '~> 5.10.3' gem 'minitest-ci', '~> 3.4.0' diff --git a/Gemfile.lock b/Gemfile.lock index 80040aa63..fd6694813 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -105,8 +105,44 @@ GEM diffy (3.4.0) docile (1.4.0) e2mmap (0.1.0) + elasticsearch (7.17.1) + elasticsearch-api (= 7.17.1) + elasticsearch-transport (= 7.17.1) + elasticsearch-api (7.17.1) + multi_json + elasticsearch-model (7.2.1) + activesupport (> 3) + elasticsearch (~> 7) + hashie + elasticsearch-rails (7.2.1) + elasticsearch-transport (7.17.1) + faraday (~> 1) + multi_json erubi (1.10.0) execjs (2.8.1) + faraday (1.10.1) + faraday-em_http (~> 1.0) + faraday-em_synchrony (~> 1.0) + faraday-excon (~> 1.1) + faraday-httpclient (~> 1.0) + faraday-multipart (~> 1.0) + faraday-net_http (~> 1.0) + faraday-net_http_persistent (~> 1.0) + faraday-patron (~> 1.0) + faraday-rack (~> 1.0) + faraday-retry (~> 1.0) + ruby2_keywords (>= 0.0.4) + faraday-em_http (1.0.0) + faraday-em_synchrony (1.0.0) + faraday-excon (1.1.0) + faraday-httpclient (1.0.1) + faraday-multipart (1.0.4) + multipart-post (~> 2) + faraday-net_http (1.0.1) + faraday-net_http_persistent (1.2.0) + faraday-patron (1.0.0) + faraday-rack (1.0.0) + faraday-retry (1.0.3) fastimage (2.2.4) ffi (1.15.5) flamegraph (0.9.5) @@ -153,6 +189,8 @@ GEM minitest (5.10.3) minitest-ci (3.4.0) minitest (>= 5.0.6) + multi_json (1.15.0) + multipart-post (2.2.3) mysql2 (0.5.3) nio4r (2.5.8) nokogiri (1.13.3-x86_64-linux) @@ -245,6 +283,7 @@ GEM ruby-progressbar (1.11.0) ruby-vips (2.1.4) ffi (~> 1.12) + ruby2_keywords (0.0.5) sass (3.7.4) sass-listen (~> 4.0.0) sass-listen (4.0.0) @@ -322,6 +361,8 @@ DEPENDENCIES devise (~> 4.7) diffy (~> 3.3) e2mmap (~> 0.1) + elasticsearch-model (~> 7.2) + elasticsearch-rails (~> 7.2) fastimage (~> 2.1) flamegraph (~> 0.9) groupdate (~> 4.3) From 8a1c050798b295f643982c79f823b09c35dce42a Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Sun, 14 Aug 2022 22:18:23 +0200 Subject: [PATCH 2/8] Add elasticsearch settings (global site) --- db/seeds/site_settings.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/db/seeds/site_settings.yml b/db/seeds/site_settings.yml index e2336e33a..2d1b9da5d 100644 --- a/db/seeds/site_settings.yml +++ b/db/seeds/site_settings.yml @@ -416,3 +416,29 @@ category: Display description: > Automatically expand vote summary entries for the last X days, X being the value of this setting. Set to 0 to expand all entries. + +- name: ElasticsearchEnabled + value: false + value_type: boolean + community_id: ~ + category: Search + description: > + Enable better searching with Elasticsearch. Requires Elasticsearch to be installed and configured. + WARNING: When enabling this, you must also synchronize elasticsearch with the database. + If elasticsearch is out of sync with the database, server errors will occur. + +- name: ElasticsearchTitleWeight + value: 5 + value_type: integer + community_id: ~ + category: Search + description: > + The relative weight that matches in the title get (Elasticsearch only). + +- name: ElasticsearchBodyWeight + value: 1 + value_type: integer + community_id: ~ + category: Search + description: > + The relative weight that matches in the body get (Elasticsearch only). From 6f57a9ed95b9e4b3f1afc690a530b0e0123f5de2 Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Sun, 14 Aug 2022 22:19:24 +0200 Subject: [PATCH 3/8] Add elasticsearchable concern Elasticsearch can be enabled and disabled site-wide. To prevent errors when it is disabled (trying to update a model in the ES index), I've used a mock here to not have to deal with the internal logic of ES assigned callbacks. Advantage is that it can be switched on and off while the site is running. Only problem with that is that the ES DB must be resynced if any changes were made while it was disabled, otherwise some actions will cause error 500 since the update cannot be applied in the ES database. --- app/models/concerns/elasticsearchable.rb | 45 ++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 app/models/concerns/elasticsearchable.rb diff --git a/app/models/concerns/elasticsearchable.rb b/app/models/concerns/elasticsearchable.rb new file mode 100644 index 000000000..3dd610d00 --- /dev/null +++ b/app/models/concerns/elasticsearchable.rb @@ -0,0 +1,45 @@ +# Adds elastic search support to the given model +# +# We use a mocking approach to allow elasticsearch to be enabled and disabled without server restart. +module Elasticsearchable + extend ActiveSupport::Concern + + # Mock for elasticsearch when it is not enabled. + class ElasticsearchMock + def client + self + end + + def method_missing(_name); end + end + + included do + include Elasticsearch::Model + include Elasticsearch::Model::Callbacks + + # Use the Rails env in the index name to prevent issues of test indices overriding development/production indices + index_name "#{Rails.env}_#{model_name.collection.gsub(%r{/}, '-')}" + + # Override elasticsearch class method such that we can mock it in case elasticsearch is disabled + def self.__elasticsearch__(&block) + if SiteSetting['ElasticsearchEnabled'] + @__elasticsearch__ ||= Elasticsearch::Model::Proxy::ClassMethodsProxy.new(self) + @__elasticsearch__.instance_eval(&block) if block_given? + @__elasticsearch__ + else + ElasticsearchMock.new + end + end + + # Override elasticsearch instance method such that we can mock it in case elasticsearch is disabled + def __elasticsearch__(&block) + if SiteSetting['ElasticsearchEnabled'] + @__elasticsearch__ ||= Elasticsearch::Model::Proxy::InstanceMethodsProxy.new(self) + @__elasticsearch__.instance_eval(&block) if block_given? + @__elasticsearch__ + else + ElasticsearchMock.new + end + end + end +end From fd568a9fafaea57c9ac9411cb7724efab968db7b Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Sun, 14 Aug 2022 22:23:16 +0200 Subject: [PATCH 4/8] Add initializer settings --- config/initializers/elasticsearch.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 config/initializers/elasticsearch.rb diff --git a/config/initializers/elasticsearch.rb b/config/initializers/elasticsearch.rb new file mode 100644 index 000000000..5f558de78 --- /dev/null +++ b/config/initializers/elasticsearch.rb @@ -0,0 +1,11 @@ +# When not using default elasticsearch settings, add them here. +# +# Elasticsearch::Model.client = Elasticsearch::Client.new hosts: [ +# { +# host: 'localhost', +# port: '9200', +# user: 'elastic', +# password: 'password', +# scheme: 'https' +# } +# ] From b8add86d413ca7e4f56985d88e670c92bc55e41b Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Sun, 14 Aug 2022 22:28:25 +0200 Subject: [PATCH 5/8] Add support for ES relevance sorting in sort orders --- app/controllers/search_controller.rb | 5 +++-- app/models/application_record.rb | 14 +++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb index f2e6673ef..70bddcd1f 100644 --- a/app/controllers/search_controller.rb +++ b/app/controllers/search_controller.rb @@ -8,8 +8,9 @@ def search posts = posts.paginate(page: params[:page], per_page: 25) if search_data[:search].present? - posts.search(search_data[:search]).user_sort({ term: params[:sort], default: :search_score }, - relevance: :search_score, score: :score, age: :created_at) + search_score_key = SiteSetting['ElasticsearchEnabled'] ? :es_search_score : :search_score + posts.search(search_data[:search]).user_sort({ term: params[:sort], default: search_score_key }, + relevance: search_score_key, score: :score, age: :created_at) else posts.user_sort({ term: params[:sort], default: :score }, score: :score, age: :created_at) diff --git a/app/models/application_record.rb b/app/models/application_record.rb index c5299443d..8b5f0b53e 100644 --- a/app/models/application_record.rb +++ b/app/models/application_record.rb @@ -97,12 +97,16 @@ def user_sort(term_opts, **field_mappings) requested = term_opts[:term] direction = term_opts[:direction] || :desc if requested.nil? || field_mappings.exclude?(requested.to_sym) - $active_search_param = default - default.is_a?(Symbol) ? order(default => direction) : order(default) + sort_key = default else - requested_val = field_mappings[requested.to_sym] - $active_search_param = requested_val - requested_val.is_a?(Symbol) ? order(requested_val => direction) : order(requested_val) + sort_key = field_mappings[requested.to_sym] + end + + $active_search_param = sort_key + if sort_key == :es_search_score + self + else + sort_key.is_a?(Symbol) ? order(sort_key => direction) : order(sort_key) end end end From 4aeca0cad2d5df87c2791d1a1030ba2d075aa966 Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Sun, 14 Aug 2022 22:29:03 +0200 Subject: [PATCH 6/8] Add ES support to Post --- app/models/post.rb | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/app/models/post.rb b/app/models/post.rb index effaab1de..428827649 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1,5 +1,6 @@ class Post < ApplicationRecord include CommunityRelated + include Elasticsearchable belongs_to :user belongs_to :post_type @@ -58,8 +59,12 @@ class Post < ApplicationRecord after_save :update_category_activity, if: -> { post_type.has_category } after_save :recalc_score - def self.search(term) - match_search term, posts: :body_markdown + scope :search, ->(term) do + if SiteSetting['ElasticsearchEnabled'] + __elasticsearch__.search(create_elasticsearch_query(term)).records.merge(self) + else + match_search term, posts: :body_markdown + end end # Double-define: initial definitions are less efficient, so if we have a record of the post type we'll @@ -172,6 +177,15 @@ def reaction_list .map { |_k, v| [v.first.reaction_type, v] }.to_h end + # Defines how Elasticsearch should index the data. + settings do + mappings dynamic: false do + indexes :id, type: :integer + indexes :title, type: :text, analyzer: :english + indexes :body_markdown, type: :text, analyzer: :english + end + end + private def update_tag_associations @@ -338,4 +352,24 @@ def update_category_activity category.update_activity(last_activity) end end + + def self.create_elasticsearch_query(term) + { + query: { + bool: { + must: [ + { + multi_match: { + query: term, + fields: %W[ + title^#{SiteSetting['ElasticsearchTitleWeight'] || 5} + body_markdown^#{SiteSetting['ElasticsearchBodyWeight'] || 1} + ] + } + } + ] + } + } + } + end end From 93d7bc6b4ad2a6ac68c6f16b06f42fda7f74dae9 Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Tue, 16 Aug 2022 10:19:42 +0200 Subject: [PATCH 7/8] Use elasticsearch multi-match --- app/models/post.rb | 16 +++++----------- db/seeds/site_settings.yml | 2 +- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/app/models/post.rb b/app/models/post.rb index 428827649..76ef9c745 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -356,17 +356,11 @@ def update_category_activity def self.create_elasticsearch_query(term) { query: { - bool: { - must: [ - { - multi_match: { - query: term, - fields: %W[ - title^#{SiteSetting['ElasticsearchTitleWeight'] || 5} - body_markdown^#{SiteSetting['ElasticsearchBodyWeight'] || 1} - ] - } - } + multi_match: { + query: term, + fields: %W[ + title^#{SiteSetting['ElasticsearchTitleWeight'] || 2} + body_markdown^#{SiteSetting['ElasticsearchBodyWeight'] || 1} ] } } diff --git a/db/seeds/site_settings.yml b/db/seeds/site_settings.yml index 2d1b9da5d..61f2ef4c5 100644 --- a/db/seeds/site_settings.yml +++ b/db/seeds/site_settings.yml @@ -428,7 +428,7 @@ If elasticsearch is out of sync with the database, server errors will occur. - name: ElasticsearchTitleWeight - value: 5 + value: 2 value_type: integer community_id: ~ category: Search From 47facd5b52598360f5ff5d523130d97c4211edcf Mon Sep 17 00:00:00 2001 From: Taico Aerts Date: Sun, 4 Sep 2022 22:32:29 +0200 Subject: [PATCH 8/8] Paginate after search to fix elasticsearch issue --- app/controllers/search_controller.rb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb index 70bddcd1f..cfd2ab962 100644 --- a/app/controllers/search_controller.rb +++ b/app/controllers/search_controller.rb @@ -3,18 +3,20 @@ def search @posts = if params[:search].present? search_data = helpers.parse_search(params[:search]) posts = (current_user&.is_moderator || current_user&.is_admin ? Post : Post.undeleted) - .qa_only.list_includes + .qa_only posts = helpers.qualifiers_to_sql(search_data[:qualifiers], posts) - posts = posts.paginate(page: params[:page], per_page: 25) if search_data[:search].present? search_score_key = SiteSetting['ElasticsearchEnabled'] ? :es_search_score : :search_score - posts.search(search_data[:search]).user_sort({ term: params[:sort], default: search_score_key }, - relevance: search_score_key, score: :score, age: :created_at) + posts = posts.search(search_data[:search]) + .user_sort({ term: params[:sort], default: search_score_key }, + relevance: search_score_key, score: :score, age: :created_at) else - posts.user_sort({ term: params[:sort], default: :score }, - score: :score, age: :created_at) + posts = posts.user_sort({ term: params[:sort], default: :score }, + score: :score, age: :created_at) end + + posts.list_includes.paginate(page: params[:page], per_page: 25) end @count = begin @posts&.count