Skip to content

Commit

Permalink
update BinarySearch, Shuffle, AdjacencyGraph, and others
Browse files Browse the repository at this point in the history
  • Loading branch information
shwestrick committed Jul 10, 2022
1 parent a52fc47 commit 2b0d469
Show file tree
Hide file tree
Showing 11 changed files with 501 additions and 72 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@ Sequences
* structure [`DelayedSeq`](doc/DelayedSeq.md)
* structure [`SeqBasis`](doc/SeqBasis.md)

Sorting
Sorting and Permutations
* structure [`Merge`](doc/Merge.md)
* structure [`StableMerge`](doc/StableMerge.md)
* structure [`StableSort`](doc/StableSort.md)
* structure [`Mergesort`](doc/Mergesort.md)
* structure [`SampleSort`](doc/SampleSort.md)
* structure [`CountingSort`](doc/CountingSort.md)
* structure [`Quicksort`](doc/Quicksort.md)
* structure [`Shuffle`](doc/Shuffle.md)

Searching
* structure [`BinarySearch`](doc/BinarySearch.md)
Expand Down
61 changes: 61 additions & 0 deletions doc/BinarySearch.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,64 @@ according to `cmp`, then this will find the "leftmost" one.
**Requires** the input sequence must be sorted w.r.t. `cmp`.

Logarithmic work and span.


```sml
val searchPosition: 'a Seq.t -> ('a -> order) -> int
```

`searchPosition s cmpTargetAgainst` finds a target position in the sequence
by using `cmpTargetAgainst` to point towards the target position. This is
useful when you aren't looking for a specific element, but some location
within a sequence. Note that this is more general than the plain `search`
function, because we can implement `search` in terms of
`searchPosition` as follows:
`fun search cmp s x = searchPosition s (fn y => cmp (x, y))`.

**Requires** the input sequence must be sorted w.r.t. `cmpTargetAgainst`.

Logarithmic work and span.


## Examples

Suppose `table: (key * value) seq` represents a mapping from keys to values,
and it is sorted by key. Here we use `searchPosition` to look up the value
associated with a particular key `target`:
```sml
fun lookup
{ table: (key * value) Seq.t
, keyCmp: key * key -> order
, target: key
}
: value option
=
let
val n = Seq.length table
(** result of this call is an idx such that table[idx] contains the
* target key, if the table contains the target key.
*)
val idx = BinarySearch.searchPosition table (fn k => keyCmp (target, k))
in
(** now we need to check if the table actually contains the key *)
if idx = n then
(** In this case, the target position is at the end of the sequence,
* i.e., it is larger than any key in the table. So this key is
* NOT in the table.
*)
NONE
else
(** In this case, the target position is somewhere in the middle of
* the sequence. It may or may not be in the table though; we need to
* inspect the key that is at table[idx]
*)
let
val (k, v) = Seq.nth table idx
in
case keyCmp (target, k) of
EQUAL => SOME v
| _ => NONE
end
end
```
14 changes: 14 additions & 0 deletions doc/Shuffle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# structure Shuffle

```sml
val shuffle: 'a Seq.t -> int -> 'a Seq.t
```

`shuffle s seed` produces a pseudorandom permutation of `s` based on the
random seed `seed`.

For a particular seed, it will always produce
the same result. Any two shuffles (using two different seeds) are independent.
E.g. `shuffle s seed` is independent of `shuffle s (seed+1)`.

Linear work and polylogarithmic span.
74 changes: 74 additions & 0 deletions lib/github.com/mpllang/mpllib/AdjacencyGraph.sml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,80 @@ struct
val maxVal = toInt (valOf maxInt)
end

structure VertexSubset =
struct
datatype h = SPARSE of Vertex.t Seq.t | DENSE of int Seq.t
type t = h * int
exception BadRep

fun empty thresh = (SPARSE (Seq.empty()), thresh)

fun size (vs, thresh) =
case vs of
SPARSE s => Seq.length s
| DENSE s => Seq.reduce op+ 0 s

fun plugOnes s positions =
(Seq.foreach positions (fn (i, v) => AS.update (s, Vertex.toInt v, 1)))

fun append (vs, threshold) s n =
case vs of
SPARSE ss =>
if (Seq.length ss) + (Seq.length s) > threshold then
let
val dense_rep = Seq.tabulate (fn x => 0) n
val _ = plugOnes dense_rep ss
val _ = plugOnes dense_rep s
in
(DENSE (dense_rep), threshold)
end
else (SPARSE(Seq.append (ss, s)), threshold)
| DENSE ss => (plugOnes ss s; (DENSE ss, threshold))

fun sparse_to_dense vs n =
case vs of
SPARSE s =>
let
val dense_rep = Seq.tabulate (fn x => 0) n
val _ = Seq.foreach s (fn (i, v) => AS.update (dense_rep, Vertex.toInt v, 1))
in
DENSE (dense_rep)
end
| DENSE _ => raise BadRep

fun dense_to_sparse vs =
case vs of
SPARSE _ => raise BadRep
| DENSE s =>
let
val (offsets, total) = Seq.scan op+ 0 s
val sparse = ForkJoin.alloc total
val _ = Seq.foreach s (fn (i, v) =>
if (v=1) then A.update (sparse, Seq.nth offsets i, Vertex.fromInt i)
else if (v = 0) then ()
else raise BadRep
)
in
SPARSE (AS.full sparse)
end

fun from_sparse_rep s threshold n =
if (Seq.length s) < threshold then (SPARSE (s), threshold)
else (sparse_to_dense (SPARSE (s)) n, threshold)

fun from_dense_rep s countopt threshold =
let
val count =
case countopt of
SOME x => x
| NONE => Seq.reduce op+ 0 s
val d = DENSE(s)
in
if count < threshold then (dense_to_sparse(d), threshold)
else (d, threshold)
end
end

type vertex = Vertex.t
fun vertexNth s v = Seq.nth s (Vertex.toInt v)
fun vToWord v = Word64.fromInt (Vertex.toInt v)
Expand Down
173 changes: 104 additions & 69 deletions lib/github.com/mpllang/mpllib/AdjacencyInt.sml
Original file line number Diff line number Diff line change
Expand Up @@ -4,114 +4,149 @@ struct

structure G = AdjacencyGraph(Int)
structure AS = ArraySlice
structure DS = DelayedSeq
open G.VertexSubset


(* fun sumOfOutDegrees frontier =
SeqBasis.reduce 10000 op+ 0 (0, Seq.length frontier) (degree o Seq.nth frontier)
(* DS.reduce op+ 0 (DS.map degree (DS.fromArraySeq frontier)) *)
fun shouldProcessDense frontier =
let
val n = Seq.length frontier
val m = sumOfOutDegrees frontier
in
n + m > denseThreshold
end *)

fun should_process_sparse g V =
fun should_process_sparse g n =
let
val denseThreshold = G.numEdges g div 20
val totalOutDegree =
SeqBasis.reduce 10000 op+ 0 (0, Seq.length V) (G.degree g o Seq.nth V)
val n = Seq.length V
val m = totalOutDegree
val deg = Int.div (G.numEdges g, G.numVertices g)
val count = (1 + deg) * n
in
n + m <= denseThreshold
count <= denseThreshold
end


fun edge_map_dense g vertices f h =
let
val inFrontier = Seq.tabulate (fn _ => false) (G.numVertices g)
val _ = Seq.foreach vertices (fn (_, v) =>
ArraySlice.update (inFrontier, v, true))
val inFrontier = vertices
val n = Seq.length vertices
val res = Seq.tabulate (fn _ => 0) n

fun processVertex v =
if not (h v) then NONE
if not (h v) then 0
else
let
val neighbors = G.neighbors g v
fun loop i =
if i >= Seq.length neighbors then NONE else
if i >= Seq.length neighbors then 0 else
let val u = Seq.nth neighbors i
in
if not (Seq.nth inFrontier u) then
if not (Seq.nth inFrontier u = 1) then
loop (i+1)
else
case f (u, v) of
NONE => loop (i+1)
| SOME x => SOME x
case f (u, v) of
NONE => loop (i+1)
| SOME x => (AS.update (res, x, 1); 1)
end
in
loop 0
end
val count = SeqBasis.reduce 1000 op+ 0 (0, n) processVertex
in
AS.full (SeqBasis.tabFilter 100 (0, G.numVertices g) processVertex)
(res, count)
end


fun edge_map_sparse g vertices f h =
let
fun app_vertex u =
val n = Seq.length vertices
fun ui uidx = Seq.nth vertices uidx
val r =
SeqBasis.scan 1000 op+ 0 (0, n) (G.degree g o ui)
val (offsets, totalOutDegree) = (AS.full r, Array.sub (r, n))
val store = ForkJoin.alloc totalOutDegree
val k = 100
val numBlocks = 1 + (totalOutDegree-1) div k
fun map_block i =
let
val all_ngbrs = (G.neighbors g u)
fun ds i = let
val v = Seq.nth all_ngbrs i
in
if h (v) then f (u, v)
else NONE
end
val m = SeqBasis.tabFilter 10000 (0, Seq.length all_ngbrs) ds
val lo = i*k
val hi = Int.min((i+1)*k, totalOutDegree)
val ulo =
let
val a = BinarySearch.search (Int.compare) offsets lo
in
if (Seq.nth offsets a) > lo then a - 1
else a
end
fun map_seq idx (u, uidx) count =
if idx >= hi then count
else if idx >= (Seq.nth offsets (uidx + 1)) then map_seq idx (ui (uidx + 1), uidx + 1) count
else
let
val v = Seq.nth (G.neighbors g u) (idx - (Seq.nth offsets uidx))
in
if (h v) then
case f (u, v) of
SOME x => (Array.update (store, lo + count, x); map_seq (idx + 1) (u, uidx) (count + 1))
| NONE => (map_seq (idx + 1) (u, uidx) count)
else
(map_seq (idx + 1) (u, uidx) count)
end
in
DS.fromArraySeq (AS.full m)
map_seq lo (ui ulo, ulo) 0
end
val counts = SeqBasis.tabulate 1 (0, numBlocks) map_block
val outOff = SeqBasis.scan 10000 op+ 0 (0, numBlocks) (fn i => Array.sub (counts, i))
val outSize = Array.sub (outOff, numBlocks)
val result = ForkJoin.alloc outSize
in
DS.toArraySeq (DS.flatten (DS.map app_vertex (DS.fromArraySeq vertices)))
ForkJoin.parfor (totalOutDegree div (Int.max (outSize, 1))) (0, numBlocks) (fn i =>
let
val soff = i * k
val doff = Array.sub (outOff, i)
val size = Array.sub (outOff, i+1) - doff
in
Util.for (0, size) (fn j =>
Array.update (result, doff+j, Array.sub (store, soff+j)))
end);
(AS.full result)
end

fun edge_map g V f h =
if should_process_sparse g V then
edge_map_sparse g V f h
else
edge_map_dense g V f h
fun edge_map g (vs, threshold) (fpar, f) h =
case vs of
SPARSE s =>
from_sparse_rep (edge_map_sparse g s fpar h) threshold (G.numVertices g)
| DENSE s =>
let
val (res, count) = edge_map_dense g s f h
in
from_dense_rep res (SOME count) threshold
end

fun contract clusters g =
let
val n = G.numVertices g
val vertices = Seq.tabulate (fn u => u) n
val has_neighbor = Seq.tabulate (fn i => 0) n
fun vertex_foreach g (vs, threshold) f =
case vs of
SPARSE s =>
Seq.foreach s (fn (i, u) => f u)
| DENSE s =>
Seq.foreach s (fn (i, b) => if (b = 1) then (f i) else ())

fun upd (u, v) =
fun vertex_map_ g (vs, threshold) f =
case vs of
SPARSE s =>
let
val (cu, cv) = ((Seq.nth clusters u), (Seq.nth clusters v))
val s' =
AS.full (SeqBasis.tabFilter 1000 (0, Seq.length s)
(fn i =>
let
val u = Seq.nth s i
val b = f u
in
if b then SOME u
else NONE
end
))
in
if cu = cv then NONE
else (AS.update (has_neighbor, cu, 1); SOME (cu, cv))
(from_sparse_rep s' threshold (G.numVertices g))
end
val sorted_edges = G.dedupEdges (edge_map g vertices upd (fn _ => true))
val (vmap, num_taken) = Seq.scan Int.+ 0 has_neighbor
val new_sorted_edges = Seq.map (fn (x, y) => (Seq.nth vmap x, Seq.nth vmap y)) sorted_edges

fun new_label c =
| DENSE s =>
let
val is_taken = (Seq.nth has_neighbor c) = 1
val num_taken_left = Seq.nth vmap c
val res =
Seq.map (fn i => if (Seq.nth s i = 1) andalso f i then 1 else 0) s
in
if is_taken then num_taken_left
else num_taken + (c - num_taken_left)
from_dense_rep res NONE threshold
end
in
(G.fromSortedEdges new_sorted_edges, new_label)
end


fun vertex_map g vs f needOut =
if needOut then vertex_map_ g vs f
else (vertex_foreach g vs; vs)

end
Loading

0 comments on commit 2b0d469

Please sign in to comment.