From e58cb69cc90f9949121feeb91707d5a1116b3f0e Mon Sep 17 00:00:00 2001 From: baxter2 Date: Sat, 11 May 2019 03:35:41 +1000 Subject: [PATCH] exclude can be a string, an array, regular expression or lambda. --- README.md | 51 ++++++++++++++++++++++++++++++++++++++++ lib/sentimental.rb | 28 +++++++++++++++++++--- spec/sentimental_spec.rb | 50 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index fc7b1af..6c79a5d 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,57 @@ The dictionary must have this format: -2.0 no 0.0 meh +## Excluding words from the analysis + +You can exclude anything you want from the string you want to analyse by passing in an `exclude` keyword. The `exclude` keyword accepts a variety of filters. + +1. A string of space-delimited list of candidates. +2. An array of string candidates. For example: `['do', 'love']`. +3. A regular expression. +4. A lambda. + +#### Using a string +```ruby +Sentimental.new( + word_scores: { 'love' => 0.925, 'do' => -0.375 }, + exclude: 'do' +).score('Do you love ruby?') +#=> 0.925 + +Sentimental.new( + word_scores: { 'love' => 0.925, 'do' => -0.375 }, + exclude: 'do love' +).score('Do you love ruby?') +#=> 0.0 +``` + +#### Using an array +```ruby +Sentimental.new( + word_scores: { 'love' => 0.925, 'do' => -0.375 }, + exclude: ['do', 'love'] +).score('Do you love ruby?') +#=> 0.0 +``` + +#### Using a regular expression +```ruby +Sentimental.new( + word_scores: { 'love' => 0.925, 'do' => -0.375 }, + exclude: /love/i +).score('Do you love ruby?') +#=> -0.375 +``` + +#### Using a lambda +```ruby +Sentimental.new( + word_scores: { 'love' => 0.925, 'do' => -0.375 }, + exclude: ->(w) { w == 'do' } +).score('Do you love ruby?') +#=> 0.925 +``` + ## Installation gem install sentimental diff --git a/lib/sentimental.rb b/lib/sentimental.rb index 577bdfa..c920c25 100644 --- a/lib/sentimental.rb +++ b/lib/sentimental.rb @@ -3,9 +3,9 @@ class Sentimental include FileReader - attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams, :influencers + attr_accessor :threshold, :word_scores, :neutral_regexps, :ngrams, :influencers, :exclude - def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1, influencers: nil) + def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1, influencers: nil, exclude: nil) @ngrams = ngrams.to_i.abs if ngrams.to_i >= 1 @word_scores = word_scores || {} @influencers = influencers || {} @@ -13,6 +13,7 @@ def initialize(threshold: 0, word_scores: nil, neutral_regexps: [], ngrams: 1, i @influencers.default = 0.0 @threshold = threshold @neutral_regexps = neutral_regexps + @exclude = filter_proc(exclude) end def score(string) @@ -20,7 +21,8 @@ def score(string) initial_scoring = {score: 0, current_influencer: 1.0} - extract_words_with_n_grams(string).inject(initial_scoring) do |current_scoring, word| + words_excluded = extract_words_with_n_grams(string).reject { |word| exclude.call(word) } + words_excluded.inject(initial_scoring) do |current_scoring, word| process_word(current_scoring, word) end[:score] end @@ -99,4 +101,24 @@ def extract_words_with_n_grams(string) def influence_score @total_score < 0.0 ? -@influence : +@influence end + + def filter_proc(filter) + if filter.respond_to?(:to_a) + filter_procs = Array(filter).map(&method(:filter_proc)) + ->(word) { + filter_procs.any? { |p| p.call(word) } + } + elsif filter.respond_to?(:to_str) + exclusion_list = filter.split.collect(&:downcase) + ->(word) { + exclusion_list.include?(word) + } + elsif regexp_filter = Regexp.try_convert(filter) + Proc.new { |word| word =~ regexp_filter } + elsif filter.respond_to?(:to_proc) + filter.to_proc + else + raise ArgumentError, "Filter must String, Array, Lambda, or a Regexp" + end + end end diff --git a/spec/sentimental_spec.rb b/spec/sentimental_spec.rb index dd74b35..4236251 100644 --- a/spec/sentimental_spec.rb +++ b/spec/sentimental_spec.rb @@ -212,4 +212,54 @@ expect(analyzer.score("I really really love ruby")).to be > analyzer.score("I really love ruby") end end + + describe 'exclude' do + let(:text) { 'Do you love ruby?' } + subject do + Sentimental.new( + word_scores: { 'love' => 0.925, 'do' => -0.375 }, + exclude: exclude + ) + end + + context 'string filter excludes one word' do + let(:exclude) { 'do' } + + it 'updates scores regarding to exclude' do + expect(subject.score(text)).to eq(0.925) + end + end + + context 'string filter excludes multiple words' do + let(:exclude) { 'do love' } + + it 'updates scores regarding to exclude' do + expect(subject.score(text)).to eq(0) + end + end + + context 'array filter excludes words' do + let(:exclude) { ['do', 'love'] } + + it 'updates scores regarding to exclude' do + expect(subject.score(text)).to eq(0) + end + end + + context 'regex filter excludes' do + let(:exclude) { /love/i } + + it 'updates scores regarding to exclude' do + expect(subject.score(text)).to eq(-0.375) + end + end + + context 'lambda filter excludes words' do + let(:exclude) { ->(w) { w == 'do' } } + + it 'updates scores regarding to exclude' do + expect(subject.score(text)).to eq(0.925) + end + end + end end