Skip to content

Commit

Permalink
Added multimap.go file doc
Browse files Browse the repository at this point in the history
  • Loading branch information
abondrn committed Oct 31, 2023
1 parent f523651 commit 1270ec8
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 24 deletions.
33 changes: 20 additions & 13 deletions io/genbank/multimap.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
/*
This file provides utilities for working with a MultiMap,
which is simply a map which can store multiple values for a single key instead
of the usual one.
Useful for when we expect to encounter repeated keys but we want to keep all pairs,
not just the latest one, while preserving O(1) time lookup cost.
Does not make uniqueness quarantees for key value pairs.
This may end up being useful for other parsers which allow for repeated keys, in which case
this should be made into its own module.
*/
package genbank

// defines a new MultiMap type which can store multiple values for a single key
// useful for when we expect repeated keys, while preserving O(1) lookup
// does not make uniqueness guarantees for values (you can repeat key-value pairs)
// defined as a simple type alias over a map of slices
// while not ideal (eg computing total number of items takes O(N))
// this has the advantage of being compatible with json.Marshal, cmp.Diff,
// pretty printing, and bracket indexing out of the box.
type MultiMap[K, V comparable] map[K][]V

// create a new empty multimap
Expand All @@ -11,15 +23,15 @@ func NewMultiMap[K, V comparable]() MultiMap[K, V] {
}

// adds a key-value pair to the multimap
func Put[K, V comparable](m MultiMap[K, V], k K, v V) {
func Put[K, V comparable](m MultiMap[K, V], k K, v ...V) {
if _, ok := m[k]; !ok {
m[k] = []V{v}
m[k] = v
} else {
m[k] = append(m[k], v)
m[k] = append(m[k], v...)
}
}

// iterates over the multimap, once for each key
// iterates over the multimap, once for each key with all values passed as a slice
func ForEachKey[K, V comparable](m MultiMap[K, V], do func(K, []V)) {
for k, values := range m {
do(k, values)
Expand All @@ -35,12 +47,7 @@ func ForEachValue[K, V comparable](m MultiMap[K, V], do func(K, V)) {
})
}

// returns number of unique keys
func KeyCount[K, V comparable](m MultiMap[K, V]) int {
return len(m)
}

// efficiently each element of a slice to create a new slice
// efficiently apply a transformation to each element of a slice to create a new slice
func MapSlice[X any, Y any](slice []X, mapper func(X) Y) []Y {
y := make([]Y, len(slice))
for i, x := range slice {
Expand Down
31 changes: 20 additions & 11 deletions io/polyjson/polyjson.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,9 @@ func Write(sequence Poly, path string) error {
return os.WriteFile(path, file, 0644)
}

// convert -> genbank
// TODO add convert <- genbank methods
// Utilities to convert polyjson objects -> their genbank equivalents
// TODO add convert <- genbank methods, which is currently difficult as most
// genbank Meta values are discarded due to lack of support for wildcard metadata in polyjson.

func (sequence *Poly) ToGenbank() genbank.Genbank {
gb := genbank.Genbank{
Expand All @@ -172,17 +173,26 @@ func (sequence *Poly) ToGenbank() genbank.Genbank {

func (meta *Meta) ToGenbank() genbank.Meta {
other := make(map[string]string)
other["URL"] = meta.URL
other["CreatedBy"] = meta.CreatedBy
other["CreatedWith"] = meta.CreatedWith
if meta.URL != "" {
other["URL"] = meta.URL
}
if meta.CreatedBy != "" {
other["CreatedBy"] = meta.CreatedBy
}
if meta.CreatedWith != "" {
other["CreatedWith"] = meta.CreatedWith
}
other["CreatedOn"] = meta.CreatedOn.String()
other["Schema"] = meta.Schema
if meta.Schema != "" {
other["Schema"] = meta.Schema
}
return genbank.Meta{
Definition: meta.Description,
Source: meta.CreatedBy,
Origin: meta.CreatedWith,
Name: meta.Name,
SequenceHash: meta.Hash,
Other: other,
}
}

Expand All @@ -204,18 +214,17 @@ func (feature *Feature) ToGenbank() genbank.Feature {
}

func (location *Location) ToGenbank() genbank.Location {
sublocations := make([]genbank.Location, len(location.SubLocations))
for i, s := range location.SubLocations {
sublocations[i] = s.ToGenbank()
}
loc := genbank.Location{
Start: location.Start,
End: location.End,
Complement: location.Complement,
Join: location.Join,
FivePrimePartial: location.FivePrimePartial,
ThreePrimePartial: location.ThreePrimePartial,
SubLocations: sublocations,
SubLocations: genbank.MapSlice(
location.SubLocations,
func(s Location) genbank.Location { return s.ToGenbank() },
),
}
loc.GbkLocationString = genbank.BuildLocationString(loc)
return loc
Expand Down

0 comments on commit 1270ec8

Please sign in to comment.