Skip to content

Latest commit

 

History

History
91 lines (87 loc) · 2.89 KB

3D.md

File metadata and controls

91 lines (87 loc) · 2.89 KB

完美Hash

  对于已知的数据集,我们总是有办法找到一个Hash函数,将其映射到容量不少于数据集元素个数的表中。这种情况称为完美Hash。

BDZ算法

  BDZ算法的核心思想在于通过尝试不同的hash种子,找到一个满足多路Hash表的无冲突填充方式。

func (h *bdz) init(keys []string) bool {
    //...
    for trys := 0; trys < 8; trys++ {
        h.seed = rand32()
        if !graph.init(keys, h.hash) { //根据随机种子生成超图
            return false
        }
        array.SetAll(bitmap[:ebmSize], 0)
        free = graph.tear(free, bitmap) //拆解超图获得边序列
        if len(free) != len(graph.edges) {
            continue
        }
        array.SetAll(bitmap[:nbmSize], 0)
        h.mapping(graph.edges, free, bitmap)
        return true
    }
    return false
}

具体实现中通常采用三路Hash表,以表中槽位为点,元素为边,可以构成一种超图。

type vertex struct {   //链表体
    slot uint32
    next uint32
}
type list struct {     //链表头
    size uint8
    head uint32
}
type hypergraph struct {
    edges [][3]vertex //超图的边,每条边三个顶点
    nodes []list      //超图的点,表现为链表
}

func (g *hypergraph) init(keys []string, hash func(string) [3]uint32) bool {
    array.SetAll(g.nodes, list{0, endIdx})
    for i := 0; i < len(keys); i++ {
        slots := hash(keys[i])
        for j := 0; j < 3; j++ {
            v := &g.edges[i][j]
            v.slot = slots[j]
            n := &g.nodes[v.slot]
            v.next = n.head
            n.head = uint32(i)
            n.size++
            if n.size > 50 { //扎堆是不正常的
                return false
    }   }   }
    return true
}

在拆解超图的过程中,保证每拆一条边能够切实解放至少一个点,也就意味着多路Hash表中某个元素可以获得槽位。

func (g *hypergraph) tear(free []uint32, bitmap []uint32) []uint32 {
    free, head := free[:0], 0
    for i := 0; i < len(g.edges); i++ {
        for j := 0; j < 3; j++ {
            n := g.nodes[g.edges[i][j].slot]
            if n.size == 1 && testAndSetBit(bitmap, n.head) {
                free = append(free, n.head)
                break
    }   }   }
    for head < len(free) {
        curr := free[head]
        head++
        for j := 0; j < 3; j++ {
            v := &g.edges[curr][j]
            n := &g.nodes[v.slot]
            p := &n.head
            for *p != curr {
                p = &g.edges[*p][j].next
            }
            *p = v.next
            v.next = endIdx
            n.size--
            if n.size == 1 && testAndSetBit(bitmap, n.head) {
                free = append(free, n.head)
    }   }   }
    return free
}

目录 上一节 下一节