From c30f7147e02963a21ef1d9c8692188d0fe465fe3 Mon Sep 17 00:00:00 2001 From: Amy Boyer Date: Tue, 5 Mar 2019 10:36:37 -0600 Subject: [PATCH 1/7] add sequential stream class --- lib/designator/random_stream.ex | 4 ++-- lib/designator/sequential_stream.ex | 15 +++++++++++++++ lib/designator/subject_stream.ex | 2 +- test/designator/random_stream_test.exs | 4 ++-- test/designator/sequential_stream_test.exs | 13 +++++++++++++ 5 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 lib/designator/sequential_stream.ex create mode 100644 test/designator/sequential_stream_test.exs diff --git a/lib/designator/random_stream.ex b/lib/designator/random_stream.ex index 50c9ff5..15eb29f 100644 --- a/lib/designator/random_stream.ex +++ b/lib/designator/random_stream.ex @@ -1,8 +1,8 @@ defmodule Designator.RandomStream do alias Designator.Random - @spec shuffle(Enumerable.t) :: Enumerable.t - def shuffle(enum) do + @spec apply_to(Enumerable.t) :: Enumerable.t + def apply_to(enum) do Stream.unfold({enum, MapSet.new}, fn {enum, drawn} -> if size(enum) <= MapSet.size(drawn) do nil diff --git a/lib/designator/sequential_stream.ex b/lib/designator/sequential_stream.ex new file mode 100644 index 0000000..d1d5472 --- /dev/null +++ b/lib/designator/sequential_stream.ex @@ -0,0 +1,15 @@ +defmodule Designator.SequentialStream do + + @spec apply_to(Enumerable.t) :: Enumerable.t + def apply_to(enum) do + Stream.with_index(enum) |> Stream.map(fn {elem,index} -> {index, elem} end) + end + + defp size(enum = %Array{}) do + Array.size(enum) + end + + defp size(enum) do + Enum.count(enum) + end +end diff --git a/lib/designator/subject_stream.ex b/lib/designator/subject_stream.ex index 197bc60..56be606 100644 --- a/lib/designator/subject_stream.ex +++ b/lib/designator/subject_stream.ex @@ -9,7 +9,7 @@ defmodule Designator.SubjectStream do ### defp build_stream(subject_ids) do - Designator.RandomStream.shuffle(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) + Designator.RandomStream.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) end def get_amount(%Array{} = subject_ids) do diff --git a/test/designator/random_stream_test.exs b/test/designator/random_stream_test.exs index 692055f..950b7c1 100644 --- a/test/designator/random_stream_test.exs +++ b/test/designator/random_stream_test.exs @@ -4,10 +4,10 @@ defmodule Designator.RandomStreamTest do import Designator.RandomStream test "empty enum returns nothing" do - assert ([] |> shuffle |> Stream.take(5) |> Enum.sort) == [] + assert ([] |> apply_to |> Stream.take(5) |> Enum.sort) == [] end test "returns data" do - assert (1..5 |> shuffle |> Stream.take(5) |> Enum.sort) == [{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}] + assert (1..5 |> apply_to |> Stream.take(5) |> Enum.sort) == [{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}] end end diff --git a/test/designator/sequential_stream_test.exs b/test/designator/sequential_stream_test.exs new file mode 100644 index 0000000..a3d84c2 --- /dev/null +++ b/test/designator/sequential_stream_test.exs @@ -0,0 +1,13 @@ +defmodule Designator.SequentialStreamTest do + use ExUnit.Case + + import Designator.SequentialStream + + test "empty enum returns nothing" do + assert ([] |> apply_to |> Stream.take(5) |> Enum.sort) == [] + end + + test "returns data" do + assert (1..5 |> apply_to |> Stream.take(5)) |> Enum.into([]) == [{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}] + end +end From 166e3491bb900a2da48d24957336b8a4ea80fd45 Mon Sep 17 00:00:00 2001 From: Amy Boyer Date: Tue, 5 Mar 2019 10:46:51 -0600 Subject: [PATCH 2/7] reorganize iteration strategies --- .../{random_stream.ex => subject_set_iterators/randomly.ex} | 2 +- .../sequentially.ex} | 2 +- lib/designator/subject_stream.ex | 3 ++- .../randomly_test.exs} | 4 ++-- .../sequentially_test.exs} | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) rename lib/designator/{random_stream.ex => subject_set_iterators/randomly.ex} (90%) rename lib/designator/{sequential_stream.ex => subject_set_iterators/sequentially.ex} (82%) rename test/designator/{random_stream_test.exs => subject_set_iterators/randomly_test.exs} (71%) rename test/designator/{sequential_stream_test.exs => subject_set_iterators/sequentially_test.exs} (70%) diff --git a/lib/designator/random_stream.ex b/lib/designator/subject_set_iterators/randomly.ex similarity index 90% rename from lib/designator/random_stream.ex rename to lib/designator/subject_set_iterators/randomly.ex index 15eb29f..205a108 100644 --- a/lib/designator/random_stream.ex +++ b/lib/designator/subject_set_iterators/randomly.ex @@ -1,4 +1,4 @@ -defmodule Designator.RandomStream do +defmodule Designator.SubjectSetIterators.Randomly do alias Designator.Random @spec apply_to(Enumerable.t) :: Enumerable.t diff --git a/lib/designator/sequential_stream.ex b/lib/designator/subject_set_iterators/sequentially.ex similarity index 82% rename from lib/designator/sequential_stream.ex rename to lib/designator/subject_set_iterators/sequentially.ex index d1d5472..f5ed2db 100644 --- a/lib/designator/sequential_stream.ex +++ b/lib/designator/subject_set_iterators/sequentially.ex @@ -1,4 +1,4 @@ -defmodule Designator.SequentialStream do +defmodule Designator.SubjectSetIterators.Sequentially do @spec apply_to(Enumerable.t) :: Enumerable.t def apply_to(enum) do diff --git a/lib/designator/subject_stream.ex b/lib/designator/subject_stream.ex index 56be606..4690a9b 100644 --- a/lib/designator/subject_stream.ex +++ b/lib/designator/subject_stream.ex @@ -9,7 +9,8 @@ defmodule Designator.SubjectStream do ### defp build_stream(subject_ids) do - Designator.RandomStream.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) + Designator.SubjectSetIterators.Randomly.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) + # Designator.SequentialStream.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) end def get_amount(%Array{} = subject_ids) do diff --git a/test/designator/random_stream_test.exs b/test/designator/subject_set_iterators/randomly_test.exs similarity index 71% rename from test/designator/random_stream_test.exs rename to test/designator/subject_set_iterators/randomly_test.exs index 950b7c1..593061b 100644 --- a/test/designator/random_stream_test.exs +++ b/test/designator/subject_set_iterators/randomly_test.exs @@ -1,7 +1,7 @@ -defmodule Designator.RandomStreamTest do +defmodule Designator.SubjectSetIterators.RandomlyTest do use ExUnit.Case - import Designator.RandomStream + import Designator.SubjectSetIterators.Randomly test "empty enum returns nothing" do assert ([] |> apply_to |> Stream.take(5) |> Enum.sort) == [] diff --git a/test/designator/sequential_stream_test.exs b/test/designator/subject_set_iterators/sequentially_test.exs similarity index 70% rename from test/designator/sequential_stream_test.exs rename to test/designator/subject_set_iterators/sequentially_test.exs index a3d84c2..2c12574 100644 --- a/test/designator/sequential_stream_test.exs +++ b/test/designator/subject_set_iterators/sequentially_test.exs @@ -1,7 +1,7 @@ -defmodule Designator.SequentialStreamTest do +defmodule Designator.SubjectSetIterators.SequentiallyTest do use ExUnit.Case - import Designator.SequentialStream + import Designator.SubjectSetIterators.Sequentially test "empty enum returns nothing" do assert ([] |> apply_to |> Stream.take(5) |> Enum.sort) == [] From 7c0dd0bff9976c6c38c46254bffe7279c3c9983f Mon Sep 17 00:00:00 2001 From: Amy Boyer Date: Tue, 5 Mar 2019 11:31:02 -0600 Subject: [PATCH 3/7] use workflow.prioritized to specify iteration strategy --- lib/designator/selection.ex | 12 ++++++++++-- lib/designator/subject_stream.ex | 9 ++++----- lib/designator/workflow_cache.ex | 8 +++++--- test/designator/selection_test.exs | 9 +++++++++ web/models/workflow.ex | 1 + 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/lib/designator/selection.ex b/lib/designator/selection.ex index 18597bb..ed94a8b 100644 --- a/lib/designator/selection.ex +++ b/lib/designator/selection.ex @@ -74,12 +74,20 @@ defmodule Designator.Selection do end @spec convert_to_streams([SubjectSetCache.t], Workflow.t) :: [SubjectStream.t] - def convert_to_streams(subject_sets, _workflow) do + def convert_to_streams(subject_sets, workflow) do Enum.map(subject_sets, fn subject_set -> - Designator.SubjectStream.build(subject_set) + Designator.SubjectStream.build(subject_set, prepare_iterator(workflow)) end) end + defp prepare_iterator(workflow) do + if workflow.prioritized do + Designator.SubjectSetIterators.Sequentially + else + Designator.SubjectSetIterators.Randomly + end + end + defp deduplicate(stream) do Stream.uniq(stream) end diff --git a/lib/designator/subject_stream.ex b/lib/designator/subject_stream.ex index 4690a9b..aa38ea6 100644 --- a/lib/designator/subject_stream.ex +++ b/lib/designator/subject_stream.ex @@ -1,16 +1,15 @@ defmodule Designator.SubjectStream do defstruct [:subject_set_id, :stream, :amount, :chance] - def build(%{subject_set_id: subject_set_id, subject_ids: subject_ids}) do + def build(%{subject_set_id: subject_set_id, subject_ids: subject_ids}, subject_set_iterator) do amount = get_amount(subject_ids) - %Designator.SubjectStream{subject_set_id: subject_set_id, stream: build_stream(subject_ids), amount: amount, chance: amount} + %Designator.SubjectStream{subject_set_id: subject_set_id, stream: build_stream(subject_ids, subject_set_iterator), amount: amount, chance: amount} end ### - defp build_stream(subject_ids) do - Designator.SubjectSetIterators.Randomly.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) - # Designator.SequentialStream.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) + defp build_stream(subject_ids, subject_set_iterator) do + subject_set_iterator.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) end def get_amount(%Array{} = subject_ids) do diff --git a/lib/designator/workflow_cache.ex b/lib/designator/workflow_cache.ex index b7affcb..724b073 100644 --- a/lib/designator/workflow_cache.ex +++ b/lib/designator/workflow_cache.ex @@ -18,7 +18,7 @@ defmodule Designator.WorkflowCache do ### Public API - defstruct [:id, :subject_set_ids, :configuration] + defstruct [:id, :subject_set_ids, :configuration, :prioritized] def status do :workflow_cache @@ -60,13 +60,15 @@ defmodule Designator.WorkflowCache do %__MODULE__{ id: workflow_id, subject_set_ids: [], - configuration: %{} + configuration: %{}, + prioritized: false } workflow -> %__MODULE__{ id: workflow_id, subject_set_ids: Designator.Workflow.subject_set_ids(workflow_id), - configuration: workflow.configuration + configuration: workflow.configuration, + prioritized: workflow.prioritized } end end diff --git a/test/designator/selection_test.exs b/test/designator/selection_test.exs index 0b3cce3..22a21b2 100644 --- a/test/designator/selection_test.exs +++ b/test/designator/selection_test.exs @@ -45,6 +45,15 @@ defmodule Designator.SelectionTest do assert Selection.select(338, 1, [limit: 4]) == [4, 2, 3, 1] end + test "sequential selection for normal sets" do + Designator.Random.seed({123, 100020, 345345}) + Designator.WorkflowCache.set(338, %{configuration: %{}, prioritized: true, subject_set_ids: [1000]}) + Designator.UserCache.set({338, 1}, %{seen_ids: MapSet.new, recently_selected_ids: MapSet.new, configuration: %{}}) + SubjectSetCache.set({338, 1000}, %SubjectSetCache{workflow_id: 338, subject_set_id: 1000, subject_ids: Array.from_list([98, 99, 100])}) + + assert Selection.select(338, 1, [limit: 6]) == [98, 99, 100] + end + test "weighed selection for normal sets" do Designator.Random.seed({123, 100020, 345345}) Designator.WorkflowCache.set(338, %{configuration: %{"subject_set_weights" => %{"1000" => 1, "1001" => 99, "1002" => 9.9, "1003" => 0.1}}, diff --git a/web/models/workflow.ex b/web/models/workflow.ex index 8a541d9..056be54 100644 --- a/web/models/workflow.ex +++ b/web/models/workflow.ex @@ -7,6 +7,7 @@ defmodule Designator.Workflow do schema "workflows" do field :project_id, :integer field :configuration, :map + field :prioritized, :boolean timestamps inserted_at: :created_at end From c2998ab22a7aafc1503c7a2459ae25cea9dca3e3 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Thu, 20 Jun 2019 16:17:02 -0500 Subject: [PATCH 4/7] Sort by SMSes by priority when retrieving from db --- test/models/workflow_test.exs | 34 ++++++++++++++++++++++------------ web/models/workflow.ex | 7 ++++++- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/test/models/workflow_test.exs b/test/models/workflow_test.exs index 2a0133e..d2e7a4f 100644 --- a/test/models/workflow_test.exs +++ b/test/models/workflow_test.exs @@ -14,10 +14,10 @@ defmodule Designator.WorkflowTest do test "returns subject ids" do Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO workflows (id, created_at, updated_at) VALUES (1, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") - Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, random, created_at, updated_at) VALUES - (1, 1, 0.5, NOW(), NOW()), - (1, 2, 0.5, NOW(), NOW()), - (1, 3, 0.5, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") assert Designator.Workflow.subject_ids(1, 1) |> Enum.sort == [1,2,3] end @@ -25,10 +25,10 @@ defmodule Designator.WorkflowTest do test "does not return retired subjects" do Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO workflows (id, created_at, updated_at) VALUES (1, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") - Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, random, created_at, updated_at) VALUES - (1, 1, 0.5, NOW(), NOW()), - (1, 2, 0.5, NOW(), NOW()), - (1, 3, 0.5, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_workflow_counts (workflow_id, subject_id, retired_at) VALUES (1, 1, NOW())") @@ -40,15 +40,25 @@ defmodule Designator.WorkflowTest do (1, NOW(), NOW()), (2, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") - Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, random, created_at, updated_at) VALUES - (1, 1, 0.5, NOW(), NOW()), - (1, 2, 0.5, NOW(), NOW()), - (1, 3, 0.5, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_workflow_counts (workflow_id, subject_id, retired_at) VALUES (1, 1, NULL), (2, 1, NULL)") assert Designator.Workflow.subject_ids(1, 1) |> Enum.sort == [1,2,3] end + + test "maintains priority order" do + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO workflows (id, created_at, updated_at) VALUES (1, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") + assert Designator.Workflow.subject_ids(1, 1) == [3,2,1] + end end end diff --git a/web/models/workflow.ex b/web/models/workflow.ex index 056be54..8552ad3 100644 --- a/web/models/workflow.ex +++ b/web/models/workflow.ex @@ -4,6 +4,8 @@ defmodule Designator.Workflow do use Designator.Web, :model + require IEx + schema "workflows" do field :project_id, :integer field :configuration, :map @@ -28,8 +30,11 @@ defmodule Designator.Workflow do query = from sms in "set_member_subjects", left_join: swc in "subject_workflow_counts", on: (sms.subject_id == swc.subject_id and swc.workflow_id == ^workflow_id), where: sms.subject_set_id == ^subject_set_id and is_nil(swc.retired_at), - select: sms.subject_id + select: {sms.subject_id, sms.priority} + Designator.Repo.all(query) + |> List.keysort(1) + |> Enum.map(fn {k, v}->k end) end def changeset(struct, params \\ %{}) do From 3e711fa4b24e594fdd9510f2b22919b8dccdbcf6 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Thu, 20 Jun 2019 16:21:22 -0500 Subject: [PATCH 5/7] Delete unused subject model --- web/models/subject.ex | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 web/models/subject.ex diff --git a/web/models/subject.ex b/web/models/subject.ex deleted file mode 100644 index c214a2b..0000000 --- a/web/models/subject.ex +++ /dev/null @@ -1,20 +0,0 @@ -defmodule Designator.Subject do - import Ecto.Query, only: [from: 2] - - def unretired_ids(workflow_id) do - query = from s in "subjects", - join: sms in "set_member_subjects", on: s.id == sms.subject_id, - join: sw in "subject_sets_workflows", on: sw.subject_set_id == sms.subject_set_id, - left_join: swc in "subject_workflow_counts", on: s.id == swc.subject_id, - where: sw.workflow_id == ^workflow_id and not(is_nil(swc.retired_at)), - select: s.id - Designator.Repo.all(query) - end - - def seen_ids(workflow_id, user_id) do - query = from uss in "user_seen_subjects", - where: uss.workflow_id == ^workflow_id and uss.user_id == ^user_id, - select: uss.subject_ids - Designator.Repo.all(query) - end -end From 8ed2fb22b6cdb9d4d5e63820cc33f900e8d61732 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Thu, 20 Jun 2019 16:31:24 -0500 Subject: [PATCH 6/7] Remove IEx require --- web/models/workflow.ex | 2 -- 1 file changed, 2 deletions(-) diff --git a/web/models/workflow.ex b/web/models/workflow.ex index 8552ad3..c887a43 100644 --- a/web/models/workflow.ex +++ b/web/models/workflow.ex @@ -4,8 +4,6 @@ defmodule Designator.Workflow do use Designator.Web, :model - require IEx - schema "workflows" do field :project_id, :integer field :configuration, :map From 1a12958f9d044f6d80a67ee1d3e79b807713c96c Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Thu, 20 Jun 2019 17:14:54 -0500 Subject: [PATCH 7/7] Un-order ids in sequential test --- test/designator/selection_test.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/designator/selection_test.exs b/test/designator/selection_test.exs index 22a21b2..7f89f36 100644 --- a/test/designator/selection_test.exs +++ b/test/designator/selection_test.exs @@ -49,9 +49,9 @@ defmodule Designator.SelectionTest do Designator.Random.seed({123, 100020, 345345}) Designator.WorkflowCache.set(338, %{configuration: %{}, prioritized: true, subject_set_ids: [1000]}) Designator.UserCache.set({338, 1}, %{seen_ids: MapSet.new, recently_selected_ids: MapSet.new, configuration: %{}}) - SubjectSetCache.set({338, 1000}, %SubjectSetCache{workflow_id: 338, subject_set_id: 1000, subject_ids: Array.from_list([98, 99, 100])}) + SubjectSetCache.set({338, 1000}, %SubjectSetCache{workflow_id: 338, subject_set_id: 1000, subject_ids: Array.from_list([98, 99, 10])}) - assert Selection.select(338, 1, [limit: 6]) == [98, 99, 100] + assert Selection.select(338, 1, [limit: 6]) == [98, 99, 10] end test "weighed selection for normal sets" do