diff --git a/lib/designator/selection.ex b/lib/designator/selection.ex index 18597bb..ed94a8b 100644 --- a/lib/designator/selection.ex +++ b/lib/designator/selection.ex @@ -74,12 +74,20 @@ defmodule Designator.Selection do end @spec convert_to_streams([SubjectSetCache.t], Workflow.t) :: [SubjectStream.t] - def convert_to_streams(subject_sets, _workflow) do + def convert_to_streams(subject_sets, workflow) do Enum.map(subject_sets, fn subject_set -> - Designator.SubjectStream.build(subject_set) + Designator.SubjectStream.build(subject_set, prepare_iterator(workflow)) end) end + defp prepare_iterator(workflow) do + if workflow.prioritized do + Designator.SubjectSetIterators.Sequentially + else + Designator.SubjectSetIterators.Randomly + end + end + defp deduplicate(stream) do Stream.uniq(stream) end diff --git a/lib/designator/random_stream.ex b/lib/designator/subject_set_iterators/randomly.ex similarity index 76% rename from lib/designator/random_stream.ex rename to lib/designator/subject_set_iterators/randomly.ex index 50c9ff5..205a108 100644 --- a/lib/designator/random_stream.ex +++ b/lib/designator/subject_set_iterators/randomly.ex @@ -1,8 +1,8 @@ -defmodule Designator.RandomStream do +defmodule Designator.SubjectSetIterators.Randomly do alias Designator.Random - @spec shuffle(Enumerable.t) :: Enumerable.t - def shuffle(enum) do + @spec apply_to(Enumerable.t) :: Enumerable.t + def apply_to(enum) do Stream.unfold({enum, MapSet.new}, fn {enum, drawn} -> if size(enum) <= MapSet.size(drawn) do nil diff --git a/lib/designator/subject_set_iterators/sequentially.ex b/lib/designator/subject_set_iterators/sequentially.ex new file mode 100644 index 0000000..f5ed2db --- /dev/null +++ b/lib/designator/subject_set_iterators/sequentially.ex @@ -0,0 +1,15 @@ +defmodule Designator.SubjectSetIterators.Sequentially do + + @spec apply_to(Enumerable.t) :: Enumerable.t + def apply_to(enum) do + Stream.with_index(enum) |> Stream.map(fn {elem,index} -> {index, elem} end) + end + + defp size(enum = %Array{}) do + Array.size(enum) + end + + defp size(enum) do + Enum.count(enum) + end +end diff --git a/lib/designator/subject_stream.ex b/lib/designator/subject_stream.ex index 197bc60..aa38ea6 100644 --- a/lib/designator/subject_stream.ex +++ b/lib/designator/subject_stream.ex @@ -1,15 +1,15 @@ defmodule Designator.SubjectStream do defstruct [:subject_set_id, :stream, :amount, :chance] - def build(%{subject_set_id: subject_set_id, subject_ids: subject_ids}) do + def build(%{subject_set_id: subject_set_id, subject_ids: subject_ids}, subject_set_iterator) do amount = get_amount(subject_ids) - %Designator.SubjectStream{subject_set_id: subject_set_id, stream: build_stream(subject_ids), amount: amount, chance: amount} + %Designator.SubjectStream{subject_set_id: subject_set_id, stream: build_stream(subject_ids, subject_set_iterator), amount: amount, chance: amount} end ### - defp build_stream(subject_ids) do - Designator.RandomStream.shuffle(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) + defp build_stream(subject_ids, subject_set_iterator) do + subject_set_iterator.apply_to(subject_ids) |> Stream.map(fn {_idx, elm} -> elm end) end def get_amount(%Array{} = subject_ids) do diff --git a/lib/designator/workflow_cache.ex b/lib/designator/workflow_cache.ex index b7affcb..724b073 100644 --- a/lib/designator/workflow_cache.ex +++ b/lib/designator/workflow_cache.ex @@ -18,7 +18,7 @@ defmodule Designator.WorkflowCache do ### Public API - defstruct [:id, :subject_set_ids, :configuration] + defstruct [:id, :subject_set_ids, :configuration, :prioritized] def status do :workflow_cache @@ -60,13 +60,15 @@ defmodule Designator.WorkflowCache do %__MODULE__{ id: workflow_id, subject_set_ids: [], - configuration: %{} + configuration: %{}, + prioritized: false } workflow -> %__MODULE__{ id: workflow_id, subject_set_ids: Designator.Workflow.subject_set_ids(workflow_id), - configuration: workflow.configuration + configuration: workflow.configuration, + prioritized: workflow.prioritized } end end diff --git a/test/designator/random_stream_test.exs b/test/designator/random_stream_test.exs deleted file mode 100644 index 692055f..0000000 --- a/test/designator/random_stream_test.exs +++ /dev/null @@ -1,13 +0,0 @@ -defmodule Designator.RandomStreamTest do - use ExUnit.Case - - import Designator.RandomStream - - test "empty enum returns nothing" do - assert ([] |> shuffle |> Stream.take(5) |> Enum.sort) == [] - end - - test "returns data" do - assert (1..5 |> shuffle |> Stream.take(5) |> Enum.sort) == [{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}] - end -end diff --git a/test/designator/selection_test.exs b/test/designator/selection_test.exs index 0b3cce3..7f89f36 100644 --- a/test/designator/selection_test.exs +++ b/test/designator/selection_test.exs @@ -45,6 +45,15 @@ defmodule Designator.SelectionTest do assert Selection.select(338, 1, [limit: 4]) == [4, 2, 3, 1] end + test "sequential selection for normal sets" do + Designator.Random.seed({123, 100020, 345345}) + Designator.WorkflowCache.set(338, %{configuration: %{}, prioritized: true, subject_set_ids: [1000]}) + Designator.UserCache.set({338, 1}, %{seen_ids: MapSet.new, recently_selected_ids: MapSet.new, configuration: %{}}) + SubjectSetCache.set({338, 1000}, %SubjectSetCache{workflow_id: 338, subject_set_id: 1000, subject_ids: Array.from_list([98, 99, 10])}) + + assert Selection.select(338, 1, [limit: 6]) == [98, 99, 10] + end + test "weighed selection for normal sets" do Designator.Random.seed({123, 100020, 345345}) Designator.WorkflowCache.set(338, %{configuration: %{"subject_set_weights" => %{"1000" => 1, "1001" => 99, "1002" => 9.9, "1003" => 0.1}}, diff --git a/test/designator/subject_set_iterators/randomly_test.exs b/test/designator/subject_set_iterators/randomly_test.exs new file mode 100644 index 0000000..593061b --- /dev/null +++ b/test/designator/subject_set_iterators/randomly_test.exs @@ -0,0 +1,13 @@ +defmodule Designator.SubjectSetIterators.RandomlyTest do + use ExUnit.Case + + import Designator.SubjectSetIterators.Randomly + + test "empty enum returns nothing" do + assert ([] |> apply_to |> Stream.take(5) |> Enum.sort) == [] + end + + test "returns data" do + assert (1..5 |> apply_to |> Stream.take(5) |> Enum.sort) == [{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}] + end +end diff --git a/test/designator/subject_set_iterators/sequentially_test.exs b/test/designator/subject_set_iterators/sequentially_test.exs new file mode 100644 index 0000000..2c12574 --- /dev/null +++ b/test/designator/subject_set_iterators/sequentially_test.exs @@ -0,0 +1,13 @@ +defmodule Designator.SubjectSetIterators.SequentiallyTest do + use ExUnit.Case + + import Designator.SubjectSetIterators.Sequentially + + test "empty enum returns nothing" do + assert ([] |> apply_to |> Stream.take(5) |> Enum.sort) == [] + end + + test "returns data" do + assert (1..5 |> apply_to |> Stream.take(5)) |> Enum.into([]) == [{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}] + end +end diff --git a/test/models/workflow_test.exs b/test/models/workflow_test.exs index 2a0133e..d2e7a4f 100644 --- a/test/models/workflow_test.exs +++ b/test/models/workflow_test.exs @@ -14,10 +14,10 @@ defmodule Designator.WorkflowTest do test "returns subject ids" do Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO workflows (id, created_at, updated_at) VALUES (1, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") - Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, random, created_at, updated_at) VALUES - (1, 1, 0.5, NOW(), NOW()), - (1, 2, 0.5, NOW(), NOW()), - (1, 3, 0.5, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") assert Designator.Workflow.subject_ids(1, 1) |> Enum.sort == [1,2,3] end @@ -25,10 +25,10 @@ defmodule Designator.WorkflowTest do test "does not return retired subjects" do Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO workflows (id, created_at, updated_at) VALUES (1, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") - Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, random, created_at, updated_at) VALUES - (1, 1, 0.5, NOW(), NOW()), - (1, 2, 0.5, NOW(), NOW()), - (1, 3, 0.5, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_workflow_counts (workflow_id, subject_id, retired_at) VALUES (1, 1, NOW())") @@ -40,15 +40,25 @@ defmodule Designator.WorkflowTest do (1, NOW(), NOW()), (2, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") - Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, random, created_at, updated_at) VALUES - (1, 1, 0.5, NOW(), NOW()), - (1, 2, 0.5, NOW(), NOW()), - (1, 3, 0.5, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_workflow_counts (workflow_id, subject_id, retired_at) VALUES (1, 1, NULL), (2, 1, NULL)") assert Designator.Workflow.subject_ids(1, 1) |> Enum.sort == [1,2,3] end + + test "maintains priority order" do + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO workflows (id, created_at, updated_at) VALUES (1, NOW(), NOW())") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO subject_sets_workflows (workflow_id, subject_set_id) VALUES (1, 1)") + Ecto.Adapters.SQL.query!(Designator.Repo, "INSERT INTO set_member_subjects (subject_set_id, subject_id, priority, random, created_at, updated_at) VALUES + (1, 1, 3, 0.5, NOW(), NOW()), + (1, 2, 2, 0.5, NOW(), NOW()), + (1, 3, 1, 0.5, NOW(), NOW())") + assert Designator.Workflow.subject_ids(1, 1) == [3,2,1] + end end end diff --git a/web/models/subject.ex b/web/models/subject.ex deleted file mode 100644 index c214a2b..0000000 --- a/web/models/subject.ex +++ /dev/null @@ -1,20 +0,0 @@ -defmodule Designator.Subject do - import Ecto.Query, only: [from: 2] - - def unretired_ids(workflow_id) do - query = from s in "subjects", - join: sms in "set_member_subjects", on: s.id == sms.subject_id, - join: sw in "subject_sets_workflows", on: sw.subject_set_id == sms.subject_set_id, - left_join: swc in "subject_workflow_counts", on: s.id == swc.subject_id, - where: sw.workflow_id == ^workflow_id and not(is_nil(swc.retired_at)), - select: s.id - Designator.Repo.all(query) - end - - def seen_ids(workflow_id, user_id) do - query = from uss in "user_seen_subjects", - where: uss.workflow_id == ^workflow_id and uss.user_id == ^user_id, - select: uss.subject_ids - Designator.Repo.all(query) - end -end diff --git a/web/models/workflow.ex b/web/models/workflow.ex index 8a541d9..c887a43 100644 --- a/web/models/workflow.ex +++ b/web/models/workflow.ex @@ -7,6 +7,7 @@ defmodule Designator.Workflow do schema "workflows" do field :project_id, :integer field :configuration, :map + field :prioritized, :boolean timestamps inserted_at: :created_at end @@ -27,8 +28,11 @@ defmodule Designator.Workflow do query = from sms in "set_member_subjects", left_join: swc in "subject_workflow_counts", on: (sms.subject_id == swc.subject_id and swc.workflow_id == ^workflow_id), where: sms.subject_set_id == ^subject_set_id and is_nil(swc.retired_at), - select: sms.subject_id + select: {sms.subject_id, sms.priority} + Designator.Repo.all(query) + |> List.keysort(1) + |> Enum.map(fn {k, v}->k end) end def changeset(struct, params \\ %{}) do