From 2ba748ddc29dab2e27e5d417a933991daaa67d17 Mon Sep 17 00:00:00 2001 From: Dusan Kasan Date: Mon, 1 May 2017 14:25:59 +0200 Subject: [PATCH] Initial commit --- CHANGELOG.md | 3 + CONTRIBUTING.md | 19 ++ LICENSE.md | 21 +++ README.md | 69 ++++++++ hashmap.go | 458 ++++++++++++++++++++++++++++++++++++++++++++++++ hashmap_test.go | 119 +++++++++++++ 6 files changed, 689 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 hashmap.go create mode 100644 hashmap_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4975519 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,3 @@ +# Changelog + +## No versions tagged yet \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..046d839 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,19 @@ +## How to contribute + +This project is open to contribution from anyone, as long as you cover your changes with tests. Your pull requests will be merged after your code passe CI and manual code review. + +Every change merges to master. No development is done in other branches. + +## Typical contribution use case + +- You need a feature that is not implemented yet +- Search for open/closed issues relating to what you need +- If you don't find anything, create new issue +- Fork this repository and create fix/feature in the fork +- Write tests for your change +- If you changed API, document the change in README +- Create pull request, describe what you did +- Wait for CI to verify you didn't break anything + - If you did, rewrite it +- If CI passes, wait for manual review by repo's owner +- Your pull request will be merged into master \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..582f948 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017 Dusan Kasan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e8cd424 --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +# Hashmap + +A [Red-Black Tree](https://en.wikipedia.org/wiki/Red%E2%80%93black_tree) [Hash Map](https://en.wikipedia.org/wiki/Hash_table) implementation in Golang that uses user-supplied hashing algorithms. + +## Usage + +The hashmap supports the classic Get, Insert, Remove operations you'd expect. + +### Inserting +```go +//hash function must take interface{} and return int64 +hashFunc := func(i interface{}) int64 { + return int64(i.(int)) +} + +m := hashmap.New(hashFunc) +//insertion of key, value. keep in mind the key will be used as input to your hashFunc +m.Insert(4, 0) +//you can store different types +m.Insert(19, "hello") +//panics, because the hashFunc doesn't support string keys +m.Insert("fail", "oh no") +``` + +### Selecting +```go +hashFunc := func(i interface{}) int64 { + return int64(i.(int)) +} + +m := hashmap.New(hashFunc) +m.Insert(4, 0) +m.Insert(19, 10) + +//returns value as interface{} and found flag (true if the key was found) +value, found := m.Get(19) +// found will be false +value, found = m.Get(123) +``` + +### Removing +```go +hashFunc := func(i interface{}) int64 { + return int64(i.(int)) +} + +m := hashmap.New(hashFunc) +m.Insert(4, 0) +m.Insert(19, 10) + +//returns found flag (true if the key was found) +found := m.Remove(19) +// found will be false +found = m.Remove(123) +``` + +## Type safety concerns + +As this hash map supports keys and values of any type (by type hinting interface{}), there could be concerns of type safety and runtime problems. The suggested way to work around this is to wrap the hash map into type-specific proxy with methods such as `Get(key KeyType) (value ValueType, found bool)` and do the type assertions there. + +Direct support for code generation by this package is still considered but not yet implemented. + +##TODO + +- implement as thread safe +- threadsafety: don't lock the whole tree but separate nodes? +- CI +- Performance optimizations +- Performance tests and docs \ No newline at end of file diff --git a/hashmap.go b/hashmap.go new file mode 100644 index 0000000..ec9c956 --- /dev/null +++ b/hashmap.go @@ -0,0 +1,458 @@ +package hashmap + +type color bool +const black color = false +const red color = true + +type matchPosition int8 +const greater matchPosition = 1 +const same matchPosition = 0 +const lower matchPosition = -1 + +type rbTreeNode struct { + color color + + keyHash int64 + key interface{} + value interface{} + + parent *rbTreeNode + + left *rbTreeNode + right *rbTreeNode + + collisions map[interface{}]interface{} +} + +type rbTree struct { + root *rbTreeNode + hashFunc func(interface{}) int64 +} + +func New(hashFunc func(i interface{}) int64) *rbTree { + return &rbTree{hashFunc: hashFunc} +} + +func (rb *rbTree) Insert(key, value interface{}) { + keyHash := rb.hashFunc(key) + + child := &rbTreeNode{ + keyHash: keyHash, + key: key, + value: value, + left: &rbTreeNode{}, + right: &rbTreeNode{}, + color: red, + } + child.collisions = map[interface{}]interface{}{} + + if rb.root != nil { + //find insertion parent and position where we should place child + parent, position := findInsertionParent(rb.root, keyHash) + + + //insert the child node + switch position { + case greater: + parent.right = child + child.parent = parent + case lower: + parent.left = child + child.parent = parent + case same: + if key == parent.key { + parent.value = value + } else { + if parent.collisions == nil { + parent.collisions = map[interface{}]interface{}{} + } + parent.collisions[key] = value + } + + return + } + } + + insertCase1(child) + + //crawl to root and assign it + for { + if child.parent == nil { + rb.root = child + break; + } + + child = child.parent + } +} + +func insertCase1(node *rbTreeNode) { + if node.parent == nil { + node.color = black + return + } + + insertCase2(node) +} + +func insertCase2(node *rbTreeNode) { + if node.parent.color == black { + return + } + + insertCase3(node) +} + +func insertCase3(node *rbTreeNode) { + uncle := getUncle(node) + if uncle != nil && uncle.color == red { + node.parent.color = black + uncle.color = black + grandparent := getGrandparent(node) + grandparent.color = red + insertCase1(grandparent) + return + } + + insertCase4(node) +} + +func insertCase4(node *rbTreeNode) { + grandparent := getGrandparent(node) + + if node == node.parent.right && node.parent == grandparent.left { + rotateLeft(node.parent) + node = node.left + } else if node == node.parent.left && node.parent == grandparent.right { + rotateRight(node.parent) + node = node.right + } + + insertCase5(node) +} + +func insertCase5(node *rbTreeNode) { + grandparent := getGrandparent(node) + node.parent.color = black + grandparent.color = red + if node == node.parent.left { + rotateRight(grandparent) + } else { + rotateLeft(grandparent) + } +} + +func (rb *rbTree) Get(key interface{}) (value interface{}, found bool) { + if rb.root == nil { + return nil, false + } + + keyHash := rb.hashFunc(key) + + node, found := findByKeyHash(rb.root, key, keyHash) + if !found { + return nil, false + } + + if node.key == key { + return node.value, true + } + + value, found = node.collisions[key] + return +} + +func (rb *rbTree) Remove(key interface{}) (found bool) { + keyHash := rb.hashFunc(key) + node, found := findByKeyHash(rb.root, key, keyHash) + if !found { + return true + } + + if len(node.collisions) > 0 { + if key == node.key { + for k, v := range (node.collisions) { + node.key = k + node.value = v + break + } + key = node.key + } + + delete(node.collisions, key) + return true + } + + //return a node with at most one non leaf sibling that should be used to replace node + replacementNode := getReplacementNode(node) + //copy the replacement value into original + copyNodeValue(replacementNode, node) + + //select the replacement node's child + replacementNodeChild := replacementNode.right + if isLeaf(replacementNodeChild) { + replacementNodeChild = replacementNode.left + } + + //replace the replacementNode with its child + replacementNodeChild.parent = replacementNode.parent + if replacementNode.parent != nil { + if replacementNode == replacementNode.parent.left { + replacementNode.parent.left = replacementNodeChild + } else { + replacementNode.parent.right = replacementNodeChild + } + } + + //if it was red we don't care + if replacementNode.color == red { + return true + } + + //if it was black and the new node is red, repaint the new node to black, preserves black depth + if replacementNodeChild.color == red { + replacementNodeChild.color = black + return true + } + + deleteCase1(replacementNodeChild) + + //crawl to root and assign it + for { + if replacementNodeChild.parent == nil { + rb.root = replacementNodeChild + + if isLeaf(rb.root) { + rb.root = nil + } + + break; + } + + replacementNodeChild = replacementNodeChild.parent + } + + return true +} + +func isLeaf(node *rbTreeNode) bool { + return node.left == nil && node.right == nil && node.color == black; +} + +//if node is the new root, finish +func deleteCase1(node *rbTreeNode) { + if node.parent != nil { + deleteCase2(node) + } +} + +//if sibling is red, we can switch sibling and parent colours and rotate +func deleteCase2(node *rbTreeNode) { + sibling := getSibling(node) + + if sibling.color == red { + node.parent.color = red + sibling.color = black + + if node == node.parent.left { + rotateLeft(node.parent) + } else { + rotateRight(node.parent) + } + } + + deleteCase3(node) +} + + +func deleteCase3(node *rbTreeNode) { + sibling := getSibling(node) + if node.parent.color == black && sibling.color == black && sibling.left.color == black && sibling.right.color == black { + sibling.color = red; + deleteCase1(node.parent); + } else { + deleteCase4(node); + } +} + +func deleteCase4(node *rbTreeNode) { + sibling := getSibling(node) + if node.parent.color == red && sibling.color == black && sibling.left.color == black && sibling.right.color == black { + sibling.color = red; + node.parent.color = black; + } else { + deleteCase5(node); + } +} + +func deleteCase5(node *rbTreeNode) { + sibling := getSibling(node) + if sibling.color == black { + if node.parent.left == node && sibling.right.color == black && sibling.left.color == red { + sibling.color = red + sibling.left.color = black + rotateRight(sibling) + } else if node.parent.right == node && sibling.right.color == red && sibling.left.color == black { + sibling.color = red + sibling.right.color = black + rotateLeft(sibling) + } + } + + deleteCase6(node) +} + +func deleteCase6(node *rbTreeNode) { + sibling := getSibling(node) + + sibling.color = node.parent.color; + node.parent.color = black; + + if (node == node.parent.left) { + sibling.right.color = black; + rotateLeft(node.parent); + } else { + sibling.left.color = black; + rotateRight(node.parent); + } +} + +func copyNodeValue(fromNode *rbTreeNode, toNode *rbTreeNode) { + //todo: optimize this to just do pointer magic, instead of copying values + toNode.key = fromNode.key + toNode.keyHash = fromNode.keyHash + toNode.value = fromNode.value + toNode.collisions = fromNode.collisions +} + +func getReplacementNode(node *rbTreeNode) *rbTreeNode { + if !isLeaf(node.right) { + return getLeftmostNode(node.right) + } else if !isLeaf(node.left) { + return getRightmostNode(node.left) + } + + return node +} + +func getLeftmostNode(node *rbTreeNode) *rbTreeNode { + if isLeaf(node.left) { + return node + } + + return getLeftmostNode(node.left) +} + +func getRightmostNode(node *rbTreeNode) *rbTreeNode { + if isLeaf(node.right) { + return node + } + + return getRightmostNode(node.right) +} + +func findByKeyHash(node *rbTreeNode, key interface{}, keyHash int64) (res *rbTreeNode, found bool) { + if node == nil { + return + } else if keyHash > node.keyHash && !isLeaf(node.right) { + return findByKeyHash(node.right, key, keyHash) + } else if keyHash < node.keyHash && !isLeaf(node.left) { + return findByKeyHash(node.left, key, keyHash) + } else if keyHash == node.keyHash { + return node, true + } + + return +} + +func rotateLeft(root *rbTreeNode) { + pivot := root.right + if isLeaf(pivot) { + return + } + + rootParent := root.parent + if rootParent != nil && rootParent.left == root { + rootParent.left = pivot + } else if rootParent != nil && rootParent.right == root { + rootParent.right = pivot + } + + pivotLeftChild := root.right.left + pivot.parent = rootParent + root.parent = pivot + pivot.left = root + root.right = pivotLeftChild + pivotLeftChild.parent = root +} + +func rotateRight(root *rbTreeNode) { + pivot := root.left + if isLeaf(root.left) { + return + } + + rootParent := root.parent + if rootParent != nil && rootParent.right == root { + rootParent.right = pivot + } else if rootParent != nil && rootParent.left == root { + rootParent.left = pivot + } + + pivotRightChild := root.left.right + pivot.parent = root.parent + root.parent = pivot + pivot.right = root + root.left = pivotRightChild + pivotRightChild.parent = root +} + +func findInsertionParent(n *rbTreeNode, keyHash int64) (*rbTreeNode, matchPosition) { + if keyHash > n.keyHash { + if isLeaf(n.right) { + return n, greater + } else { + return findInsertionParent(n.right, keyHash) + } + } else if keyHash < n.keyHash { + if isLeaf(n.left) { + return n, lower + } else { + return findInsertionParent(n.left, keyHash) + } + } else { + return n, same + } +} + +func getGrandparent(n *rbTreeNode) (g *rbTreeNode) { + if n.parent != nil && n.parent.parent != nil { + g = n.parent.parent + } + + return +} + +func getUncle(n *rbTreeNode) (u *rbTreeNode) { + g := getGrandparent(n) + if g == nil { + return + } else if n.parent == g.left { + return g.right + } else { + return g.left + } +} + +func getSibling(n *rbTreeNode) (u *rbTreeNode) { + if n.parent == nil { + return nil + } + + if n.parent.left == n { + return n.parent.right + } else { + return n.parent.left + } +} + diff --git a/hashmap_test.go b/hashmap_test.go new file mode 100644 index 0000000..0e56a14 --- /dev/null +++ b/hashmap_test.go @@ -0,0 +1,119 @@ +package hashmap_test + +import ( + "testing" + "github.com/DusanKasan/hashmap" + "math/rand" + "time" +) + +func TestHashmap(t *testing.T) { + rand.Seed(time.Now().UnixNano()) + + //hash function that causes collisions + hashFunc := func(i interface{}) int64 { + v := i.(int64) + if v != 0 && v % 5 == 0 { + return v - 1 + } + + return v + } + + //go multiple times over different sizes of input data + for inputSize := 1; inputSize < 10; inputSize++ { + for iteration := 1; iteration < 100; iteration++ { + input := generateInputPool(inputSize) + t.Logf("Running with input: %v", input) + + m := hashmap.New(hashFunc) + + for key, value := range (input) { + t.Logf("Inserting key: %v", key) + m.Insert(key, value) + } + + for key, value := range (input) { + v, found := m.Get(key) + if !found { + t.Errorf("Key not found: %v", key) + } else { + if v == nil { + t.Errorf("Key %v has a nil value", key) + } else if v.(int64) != value { + t.Errorf("Key %v has wrong value. Expected %v, Got %v", key, value, v) + } + } + } + + keys := getShuffledKeys(input) + t.Logf("Shuffled keys: %v", keys) + removedKeys := []int64{} + for len(keys) > 0 { + preservedKeys := []int64{} + for i, k := range(keys) { + + if i == 0 { + t.Logf("Removing key: %v", k) + found := m.Remove(k) + if !found { + t.Errorf("Unable to find and remove key: %v", k) + } + t.Logf("Removed key: %v", k) + + removedKeys = append(removedKeys, k) + } else { + preservedKeys = append(preservedKeys, k) + } + } + keys = preservedKeys + + for _, k := range(keys) { + v, found := m.Get(k) + if !found { + t.Errorf("Key %v not found!", k) + } else if v != input[k] { + t.Errorf("Key %v has wrong value. Expected %v, Got %v", k, input[k], v) + } + } + + for _, k := range(removedKeys) { + _, found := m.Get(k) + if found { + t.Errorf("Key %v found when it shouldn't have been!", k) + } + } + } + } + } +} + +//generate a map with randomized keys and values +func generateInputPool(size int) (map[int64]int64) { + r := map[int64]int64{} + values := rand.Perm(size * 4) + + for index, key := range (rand.Perm(size * 4)) { + if index % 4 == 0 { + r[int64(key)] = int64(values[index]) + } + } + + return r +} + +func getShuffledKeys(input map[int64]int64) []int64 { + keys := []int64{} + + for key, _ := range(input) { + keys = append(keys, key) + } + + order := rand.Perm(len(keys)) + result := []int64{} + for _, k := range(order) { + result = append(result, keys[k]) + } + + return result +}