From 2ba27538570fe7ce3738a6b124e561e7bc556662 Mon Sep 17 00:00:00 2001
From: Michal Siedlaczek <michal@siedlaczek.me>
Date: Thu, 4 Jan 2024 17:15:24 -0500
Subject: [PATCH] Add efficiency disclaimer to `keep_duplicates`

---
 include/pisa/query.hpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/pisa/query.hpp b/include/pisa/query.hpp
index 4685eaff..34bf9da7 100644
--- a/include/pisa/query.hpp
+++ b/include/pisa/query.hpp
@@ -60,10 +60,16 @@ namespace query {
      * occurrence.
      *
      * This policy can be modified with the following options:
-     *  - `keep_duplicates`: duplicates will be preserved, each with weight 1.0;
+     *  - `keep_duplicates`: duplicates will be preserved, each with weight 1.0
+     *    (inefficient -- see below);
      *  - `unweighted`: forces each weight to be 1.0 even if duplicates are removed;
      *  - `sort`: sorts terms by ID.
      *
+     * !! Note that `keep_duplicates` is very inefficient if used for retrieval because some posting:
+     * lists will have to be traversed multiple times if duplicate terms exist. Do not use it unless
+     * you know exactly what you are doing (e.g. if you use Query outside of the standard query
+     * processing and you rely on duplicates).
+     *
      * Policies can be combined similar to bitsets. For example, `unweighted | sort` will both
      * force unit weights and sort the terms.
      */
@@ -77,7 +83,8 @@ namespace query {
     /** Merges two policies; the resulting policy will policies from both arguments. */
     [[nodiscard]] auto operator|(TermPolicy lhs, TermPolicy rhs) noexcept -> TermPolicy;
 
-    /** Keep duplicates. */
+    /** Duplicates are removed and weights are equal to number of occurrences of each term in the
+     * query. Terms are not sorted. */
     static constexpr TermPolicy default_policy = {0b000};
 
     /** Keep duplicates. */