From 2ace3be45673c97caab8ef7f628d7dc8fc621fac Mon Sep 17 00:00:00 2001 From: Jeff Evans Date: Tue, 25 May 2021 13:47:54 -0500 Subject: [PATCH] Improve performance of removing elements from sequences Merging `InsertBeforeIndex` protocol into new `IndexedOps` protocol for optimized indexed based operations (which now includes inserting before and removing at) Updating `nthpath` and `keypath` to use new helper fns Adding tests and benchmarks --- scripts/benchmarks.clj | 55 ++++++++++++++------ src/clj/com/rpl/specter/navs.cljc | 80 ++++++++++++++++++++++------- test/com/rpl/specter/core_test.cljc | 19 ++++++- 3 files changed, 119 insertions(+), 35 deletions(-) diff --git a/scripts/benchmarks.clj b/scripts/benchmarks.clj index 347afff..afcb465 100644 --- a/scripts/benchmarks.clj +++ b/scripts/benchmarks.clj @@ -112,8 +112,8 @@ (run-benchmark "transform values of a list" (transform ALL inc data) (doall (sequence (map inc) data)) - (reverse (into '() (map inc) data)) - )) + (reverse (into '() (map inc) data)))) + (let [data {:a 1 :b 2 :c 3 :d 4}] (run-benchmark "transform values of a small map" @@ -127,8 +127,8 @@ (into {} (map (fn [e] [(key e) (inc (val e))]) data)) (into {} (map (fn [e] [(key e) (inc (val e))])) data) (map-vals-map-iterable data inc) - (map-vals-map-iterable-transient data inc) - )) + (map-vals-map-iterable-transient data inc))) + (let [data (->> (for [i (range 1000)] [i i]) (into {}))] @@ -152,8 +152,8 @@ (first data) (select-any ALL data) (select-any FIRST data) - (select-first ALL data) - )) + (select-first ALL data))) + (let [data [1 2 3 4 5]] (run-benchmark "map a function over a vector" @@ -192,8 +192,8 @@ (run-benchmark "prepend to a vector" (vec (cons 0 data)) (setval BEFORE-ELEM 0 data) - (into [0] data) - )) + (into [0] data))) + (declarepath TreeValues) @@ -314,8 +314,8 @@ (map (fn [[k v]] [(keyword (str *ns*) (name k)) v])) data) (reduce-kv (fn [m k v] (assoc m (keyword (str *ns*) (name k)) v)) {} data) - (setval [MAP-KEYS NAMESPACE] (str *ns*) data) - )) + (setval [MAP-KEYS NAMESPACE] (str *ns*) data))) + (let [data (->> (for [i (range 1000)] [(keyword (str i)) i]) (into {}))] @@ -324,8 +324,8 @@ (map (fn [[k v]] [(keyword (str *ns*) (name k)) v])) data) (reduce-kv (fn [m k v] (assoc m (keyword (str *ns*) (name k)) v)) {} data) - (setval [MAP-KEYS NAMESPACE] (str *ns*) data) - )) + (setval [MAP-KEYS NAMESPACE] (str *ns*) data))) + (defnav walker-old [afn] (select* [this structure next-fn] @@ -336,8 +336,8 @@ (let [data {:a [1 2 {:c '(3 4) :d {:e [1 2 3] 7 8 9 10}}]}] (run-benchmark "walker vs. clojure.walk version" (transform (walker number?) inc data) - (transform (walker-old number?) inc data) - )) + (transform (walker-old number?) inc data))) + (let [size 1000 middle-idx (/ size 2) @@ -354,4 +354,29 @@ (run-benchmark "before-index at 0 vs. srange vs. cons (list)" (setval (before-index 0) v data-lst) (setval (srange 0 0) [v] data-lst) - (cons v data-lst))) + (cons v data-lst)) + (run-benchmark "set keypath and nthpath at index to NONE versus srange in middle (vector)" + (setval (nthpath middle-idx) NONE data-vec) + (setval (keypath middle-idx) NONE data-vec) + (setval (srange middle-idx (inc middle-idx)) [] data-vec)) + (run-benchmark "set keypath and nthpath at index to NONE versus srange in middle (list)" + ;; this case still needs to be optimized in nthpath* + (setval (nthpath middle-idx) NONE data-lst) + (setval (keypath middle-idx) NONE data-lst) + (setval (srange middle-idx (inc middle-idx)) [] data-lst)) + (run-benchmark "set keypath and nthpath at beginning to NONE versus srange and subvec (vector)" + (setval (nthpath 0) NONE data-vec) + (setval (keypath 0) NONE data-vec) + (setval (srange 0 1) [] data-vec) + (subvec data-vec 1)) + (run-benchmark "set keypath and nthpath at beginning to NONE versus srange and rest (list)" + ;; this case still needs to be optimized in nthpath* + (setval (nthpath 0) NONE data-lst) + (setval (keypath 0) NONE data-lst) + (setval (srange 0 1) [] data-lst) + (rest data-lst)) + (run-benchmark "set keypath and nthpath at end to NONE versus srange and subvec (vector)" + (setval (nthpath (dec size)) NONE data-vec) + (setval (keypath (dec size)) NONE data-vec) + (setval (srange (dec size) size) [] data-vec) + (subvec data-vec 0 (dec size)))) diff --git a/src/clj/com/rpl/specter/navs.cljc b/src/clj/com/rpl/specter/navs.cljc index 6465432..74511d5 100644 --- a/src/clj/com/rpl/specter/navs.cljc +++ b/src/clj/com/rpl/specter/navs.cljc @@ -492,9 +492,6 @@ (defprotocol FastEmpty (fast-empty? [s])) -(defprotocol InsertBeforeIndex - (insert-before-idx [aseq idx val])) - (defnav PosNavigator [getter updater] (select* [this structure next-fn] (if-not (fast-empty? structure) @@ -643,7 +640,29 @@ (nth s (-> s count dec)) )) +(defprotocol IndexedOps + "Fast indexed operations on sequential types" + (insert-before-idx [aseq idx val]) + (remove-at-idx [aseq idx])) + +;; helper fns for indexed operations +(defn- insert-before-index-list [lst idx v] + ;; an implementation that is most efficient for list style structures + (let [[front back] (split-at idx lst)] + (concat front (cons v back)))) + +(defn- remove-at-index-vec [aseq idx] + (condp = idx + 0 (subvec aseq 1) + (vec-count aseq) (subvec aseq 0 (vec-count aseq)) + (into (subvec aseq 0 idx) (subvec aseq (inc idx))))) +(defn- remove-at-index-list [lst idx] + ;; an implementation that is most efficient for list style structures + (condp = idx + 0 (rest lst) + (let [[front back] (split-at idx lst)] + (concat front (rest back))))) (extend-protocol FastEmpty nil @@ -664,7 +683,7 @@ (let [newv (next-fn vals (get structure key))] (if (identical? newv i/NONE) (if (sequential? structure) - (i/srange-transform* structure key (inc key) (fn [_] [])) + (remove-at-idx structure key) (dissoc structure key)) (assoc structure key newv)))) @@ -704,8 +723,8 @@ (if (vector? structure) (let [newv (next-fn vals (nth structure i))] (if (identical? newv i/NONE) - (i/srange-transform* structure i (inc i) (fn [_] [])) - (assoc structure i newv))) + (remove-at-index-vec structure i) + (assoc structure i newv))) (i/srange-transform* ; can make this much more efficient with alternate impl structure i @@ -726,35 +745,60 @@ (end-fn structure) )) -(defn- insert-before-index-list [lst idx val] - ;; an implementation that is most efficient for list style structures - (let [[front back] (split-at idx lst)] - (concat front (cons val back)))) - -(extend-protocol InsertBeforeIndex +(extend-protocol IndexedOps nil (insert-before-idx [_ idx val] (if (= 0 idx) (list val) (throw (ex-info "For a nil structure, can only insert before index 0" {:insertion-index idx})))) + (remove-at-idx [_ _] + ;; removing from nil structure at any index should just be nil? + nil) #?(:clj java.lang.String :cljs string) - (insert-before-idx [aseq idx val] - (apply str (insert-before-index-list aseq idx val))) + (insert-before-idx [aseq idx v] + (apply str (insert-before-index-list aseq idx v))) + (remove-at-idx [s idx] + (str (subs s 0 idx) (subs s idx))) #?(:clj clojure.lang.LazySeq :cljs cljs.core/LazySeq) - (insert-before-idx [aseq idx val] - (insert-before-index-list aseq idx val)) + (insert-before-idx [aseq idx v] + (insert-before-index-list aseq idx v)) + (remove-at-idx [aseq idx] + (remove-at-index-list aseq idx)) #?(:clj clojure.lang.IPersistentVector :cljs cljs.core/PersistentVector) (insert-before-idx [aseq idx val] (let [front (subvec aseq 0 idx) back (subvec aseq idx)] (into (conj front val) back))) + (remove-at-idx [aseq idx] + (remove-at-index-vec aseq idx)) + + ;; TODO: incorporate this into the transients namespace instead/in addition to? + #?(:clj clojure.lang.ITransientVector :cljs cljs.core/TransientVector) + (insert-before-idx [aseq idx val] + (loop [v aseq prev-val val curr-idx idx] + (if + (= curr-idx (transient-vec-count v)) + (assoc! v curr-idx prev-val) + (let [curr-val (nth v curr-idx)] + (recur (assoc! v curr-idx prev-val) curr-val (inc curr-idx)))))) + (remove-at-idx [aseq idx] + (loop [v aseq curr-idx idx] + (if + (< curr-idx (dec (transient-vec-count v))) + (let [next-val (nth v (inc curr-idx))] + (recur (assoc! v curr-idx next-val) (inc curr-idx))) + (pop! v)))) #?(:clj clojure.lang.IPersistentList :cljs cljs.core/List) (insert-before-idx [aseq idx val] - (cond (= idx 0) + (if (= idx 0) (cons val aseq) - :else (insert-before-index-list aseq idx val)))) + (insert-before-index-list aseq idx val))) + (remove-at-idx [aseq idx] + (if (= idx 0) + (rest aseq) + (remove-at-index-list aseq idx)))) diff --git a/test/com/rpl/specter/core_test.cljc b/test/com/rpl/specter/core_test.cljc index 1ba123c..59ccbf7 100644 --- a/test/com/rpl/specter/core_test.cljc +++ b/test/com/rpl/specter/core_test.cljc @@ -1,6 +1,6 @@ (ns com.rpl.specter.core-test #?(:cljs (:require-macros - [cljs.test :refer [is deftest]] + [cljs.test :refer [is deftest testing]] [clojure.test.check.clojure-test :refer [defspec]] [com.rpl.specter.cljs-test-helpers :refer [for-all+]] [com.rpl.specter.test-helpers :refer [ic-test]] @@ -13,7 +13,7 @@ defdynamicnav traverse-all satisfies-protpath? end-fn vtransform]])) (:use - #?(:clj [clojure.test :only [deftest is]]) + #?(:clj [clojure.test :only [deftest is testing]]) #?(:clj [clojure.test.check.clojure-test :only [defspec]]) #?(:clj [com.rpl.specter.test-helpers :only [for-all+ ic-test]]) #?(:clj [com.rpl.specter @@ -33,6 +33,7 @@ #?(:cljs [clojure.test.check.generators :as gen]) #?(:cljs [clojure.test.check.properties :as prop :include-macros true]) [com.rpl.specter :as s] + [com.rpl.specter.navs :as n] [com.rpl.specter.transients :as t] [clojure.set :as set])) @@ -1331,6 +1332,7 @@ (deftest nthpath-test (is (predand= vector? [1 2 -3 4] (transform (s/nthpath 2) - [1 2 3 4]))) (is (predand= vector? [1 2 4] (setval (s/nthpath 2) s/NONE [1 2 3 4]))) + (is (predand= vector? [1 2 3] (setval (s/nthpath 3) s/NONE [1 2 3 4]))) (is (predand= (complement vector?) '(1 -2 3 4) (transform (s/nthpath 1) - '(1 2 3 4)))) (is (predand= (complement vector?) '(1 2 4) (setval (s/nthpath 2) s/NONE '(1 2 3 4)))) (is (= [0 1 [2 4 4]] (transform (s/nthpath 2 1) inc [0 1 [2 3 4]]))) @@ -1702,3 +1704,16 @@ (is (satisfies-protpath? FooPP "a")) (is (not (satisfies-protpath? FooPP 1))) ))) + +;; adding a separate test because these are not yet exercised by the rest of the suite +(deftest indexed-opts-transient-vectors-test + (testing "IndexedOps fns work properly for transient vectors" + (let [v (vec (range 10))] + (doseq [[f args exp] [[n/remove-at-idx [0] (vec (range 1 10))] + [n/remove-at-idx [5] [0 1 2 3 4 6 7 8 9]] + [n/remove-at-idx [9] (vec (range 9))] + [n/insert-before-idx [0 -1] (vec (range -1 10))] + [n/insert-before-idx [7 -1] [0 1 2 3 4 5 6 -1 7 8 9]] + [n/insert-before-idx [10 10] (vec (range 11))]]] + (is (= exp (-> (apply f (cons (transient v) args)) + persistent!)))))))