diff --git a/hashtable.test b/hashtable.test new file mode 100755 index 0000000..c068489 Binary files /dev/null and b/hashtable.test differ diff --git a/hashtable/hashtable_test.go b/hashtable/hashtable_test.go index 1d2d6ab..8568367 100644 --- a/hashtable/hashtable_test.go +++ b/hashtable/hashtable_test.go @@ -4,6 +4,7 @@ import "testing" import ( "encoding/binary" + "encoding/hex" "math/rand" "os" ) @@ -39,6 +40,20 @@ func randstr(length int) String { panic("unreachable") } +func randhex(length int) String { + if urandom, err := os.Open("/dev/urandom"); err != nil { + panic(err) + } else { + slice := make([]byte, length/2) + if _, err := urandom.Read(slice); err != nil { + panic(err) + } + urandom.Close() + return String(hex.EncodeToString(slice)) + } + panic("unreachable") +} + func TestMake(t *testing.T) { NewHashTable(12) } @@ -70,8 +85,8 @@ func TestPutHasGetRemove(t *testing.T) { ranrec := func() *record { return &record{ - String(randstr(20)), - String(randstr(20)), + String(randhex(12)), + String(randhex(12)), } } @@ -80,54 +95,57 @@ func TestPutHasGetRemove(t *testing.T) { for i := range records { r := ranrec() records[i] = r + if table.Has(r.key) { + t.Fatal("Table has extra key", table, r.key) + } err := table.Put(r.key, String("")) if err != nil { - t.Error(err) + t.Fatal(err) } err = table.Put(r.key, r.value) if err != nil { - t.Error(err) + t.Fatal(err) } if table.Size() != (i + 1) { - t.Error("size was wrong", table.Size(), i+1) + t.Fatal("size was wrong", table.Size(), i+1) } } for _, r := range records { if has := table.Has(r.key); !has { - t.Error(table, "Missing key") + t.Fatal(table, "Missing key", r, r.key.Hash()) } - if has := table.Has(randstr(12)); has { - t.Error("Table has extra key") + if has := table.Has(randhex(12)); has { + t.Fatal("Table has extra key") } if val, err := table.Get(r.key); err != nil { - t.Error(err) + t.Fatal(err) } else if !(val.(String)).Equals(r.value) { - t.Error("wrong value") + t.Fatal("wrong value") } } for i, x := range records { if val, err := table.Remove(x.key); err != nil { - t.Error(err) + t.Fatal(err) } else if !(val.(String)).Equals(x.value) { - t.Error("wrong value") + t.Fatal("wrong value") } for _, r := range records[i+1:] { if has := table.Has(r.key); !has { - t.Error("Missing key") + t.Fatal("Missing key") } - if has := table.Has(randstr(12)); has { - t.Error("Table has extra key") + if has := table.Has(randhex(12)); has { + t.Fatal("Table has extra key") } if val, err := table.Get(r.key); err != nil { - t.Error(err) + t.Fatal(err) } else if !(val.(String)).Equals(r.value) { - t.Error("wrong value") + t.Fatal("wrong value") } } if table.Size() != (len(records) - (i + 1)) { - t.Error("size was wrong", table.Size(), (len(records) - (i + 1))) + t.Fatal("size was wrong", table.Size(), (len(records) - (i + 1))) } } } @@ -275,3 +293,38 @@ func BenchmarkMLHash(b *testing.B) { } } } + +func BenchmarkMLHashBetter(b *testing.B) { + b.StopTimer() + + type record struct { + key String + value String + } + + records := make([]*record, 100) + + ranrec := func() *record { + return &record{randstr(20), randstr(20)} + } + + for i := range records { + records[i] = ranrec() + } + + b.StartTimer() + for i := 0; i < b.N; i++ { + t := NewLinearHash() + for _, r := range records { + t.Put(r.key, r.value) + } + for _, _, next := t.Iterate()(); next != nil; _, _, next = next() {} + for _, next := t.Keys()(); next != nil; _, next = next() {} + for _, next := t.Values()(); next != nil; _, next = next() {} + for _, next := t.Values()(); next != nil; _, next = next() {} + for _, next := t.Values()(); next != nil; _, next = next() {} + for _, r := range records { + t.Remove(r.key) + } + } +} diff --git a/hashtable/linhash.go b/hashtable/linhash.go index 469f6f3..721b586 100644 --- a/hashtable/linhash.go +++ b/hashtable/linhash.go @@ -1,8 +1,14 @@ package hashtable import ( - "github.com/timtadh/data-structures/tree/avl" - . "github.com/timtadh/data-structures/types" + "fmt" + "strings" +) + +import ( + "github.com/timtadh/data-structures/errors" + "github.com/timtadh/data-structures/list" + "github.com/timtadh/data-structures/types" ) const ( @@ -12,14 +18,14 @@ const ( type bst struct { hash int - key Hashable + key types.Hashable value interface{} left *bst right *bst } type LinearHash struct { - table []*avl.AvlNode + table []*list.List n uint r uint i uint @@ -28,15 +34,19 @@ type LinearHash struct { func NewLinearHash() *LinearHash { N := uint(32) I := uint(5) - return &LinearHash{ - table: make([]*avl.AvlNode, N), + h := &LinearHash{ + table: make([]*list.List, N), n: N, r: 0, i: I, } + for i := range h.table { + h.table[i] = list.New(RECORDS_PER_BLOCK) + } + return h } -func (self *LinearHash) bucket(key Hashable) uint { +func (self *LinearHash) bucket(key types.Hashable) uint { m := uint(key.Hash() & ((1 << self.i) - 1)) if m < self.n { return m @@ -49,12 +59,22 @@ func (self *LinearHash) Size() int { return int(self.r) } -func (self *LinearHash) Put(key Hashable, value interface{}) (err error) { - var updated bool +func (self *LinearHash) Put(key types.Hashable, value interface{}) (err error) { + e := &types.MapEntry{key, value} bkt_idx := self.bucket(key) - self.table[bkt_idx], updated = self.table[bkt_idx].Put(key, value) - if !updated { - self.r += 1 + bkt := self.table[bkt_idx] + i, has, err := list.Find(bkt, e) + if err != nil { + return err + } else if !has { + bkt.Insert(i, e) + self.r++ + } else { + if item, err := bkt.Get(i); err != nil { + return err + } else { + item.(*types.MapEntry).Value = value + } } if float64(self.r) > UTILIZATION*float64(self.n)*float64(RECORDS_PER_BLOCK) { return self.split() @@ -62,38 +82,67 @@ func (self *LinearHash) Put(key Hashable, value interface{}) (err error) { return nil } -func (self *LinearHash) Get(key Hashable) (value interface{}, err error) { +func (self *LinearHash) Get(key types.Hashable) (value interface{}, err error) { bkt_idx := self.bucket(key) - return self.table[bkt_idx].Get(key) + bkt := self.table[bkt_idx] + i, has, err := list.Find(bkt, key) + if err != nil { + return nil, err + } else if !has { + return nil, errors.Errorf("Key not found! '%v'", key) + } + item, err := bkt.Get(i) + if err != nil { + return nil, err + } + return item.(*types.MapEntry).Value, nil } -func (self *LinearHash) Has(key Hashable) bool { +func (self *LinearHash) Has(key types.Hashable) bool { bkt_idx := self.bucket(key) - return self.table[bkt_idx].Has(key) + bkt := self.table[bkt_idx] + _, has, _ := list.Find(bkt, key) + return has } -func (self *LinearHash) Remove(key Hashable) (value interface{}, err error) { +func (self *LinearHash) Remove(key types.Hashable) (value interface{}, err error) { bkt_idx := self.bucket(key) - self.table[bkt_idx], value, err = self.table[bkt_idx].Remove(key) - if err == nil { - self.r -= 1 + bkt := self.table[bkt_idx] + i, has, err := list.Find(bkt, key) + if err != nil { + return nil, err + } else if !has { + return nil, errors.Errorf("Key not found! '%v'", key) } - return + item, err := bkt.Get(i) + if err != nil { + return nil, err + } + err = bkt.Remove(i) + if err != nil { + return nil, err + } + if !item.(*types.MapEntry).Key.Equals(key) { + return nil, errors.Errorf("assert fail: %v != %v", key, item) + } + self.r-- + return item.(*types.MapEntry).Value, nil } func (self *LinearHash) split() (err error) { bkt_idx := self.n % (1 << (self.i - 1)) old_bkt := self.table[bkt_idx] - var bkt_a, bkt_b *avl.AvlNode + bkt_a := list.New(RECORDS_PER_BLOCK) + bkt_b := list.New(RECORDS_PER_BLOCK) self.n += 1 if self.n > (1 << self.i) { self.i += 1 } - for key, value, next := old_bkt.Iterate()(); next != nil; key, value, next = next() { - if self.bucket(key.(Hashable)) == bkt_idx { - bkt_a, _ = bkt_a.Put(key.(Hashable), value) + for item, next := old_bkt.Items()(); next != nil; item, next = next() { + if self.bucket(item) == bkt_idx { + bkt_a.Append(item) } else { - bkt_b, _ = bkt_b.Put(key.(Hashable), value) + bkt_b.Append(item) } } self.table[bkt_idx] = bkt_a @@ -101,33 +150,46 @@ func (self *LinearHash) split() (err error) { return nil } -func (self *LinearHash) Iterate() KVIterator { +func (self *LinearHash) Iterate() (kvi types.KVIterator) { table := self.table i := 0 - iter := table[i].Iterate() - var kv_iterator KVIterator - kv_iterator = func() (key Hashable, val interface{}, next KVIterator) { - key, val, iter = iter() + iter := table[i].Items() + kvi = func() (key types.Hashable, val interface{}, _ types.KVIterator) { + var item types.Hashable + item, iter = iter() for iter == nil { i++ if i >= len(table) { return nil, nil, nil } - key, val, iter = table[i].Iterate()() + item, iter = table[i].Items()() } - return key, val, kv_iterator + e := item.(*types.MapEntry) + return e.Key, e.Value, kvi } - return kv_iterator + return kvi +} + +func (self *LinearHash) Items() (vi types.KIterator) { + return types.MakeItemsIterator(self) } -func (self *LinearHash) Items() (vi KIterator) { - return MakeItemsIterator(self) +func (self *LinearHash) Keys() types.KIterator { + return types.MakeKeysIterator(self) } -func (self *LinearHash) Keys() KIterator { - return MakeKeysIterator(self) +func (self *LinearHash) Values() types.Iterator { + return types.MakeValuesIterator(self) } -func (self *LinearHash) Values() Iterator { - return MakeValuesIterator(self) +func (self *LinearHash) String() string { + if self.Size() <= 0 { + return "{}" + } + items := make([]string, 0, self.Size()) + for item, next := self.Items()(); next != nil; item, next = next() { + items = append(items, fmt.Sprintf("%v", item)) + } + return "{" + strings.Join(items, ", ") + "}" } + diff --git a/list/sorted.go b/list/sorted.go index 1498fd2..27dd0eb 100644 --- a/list/sorted.go +++ b/list/sorted.go @@ -155,7 +155,6 @@ func (s *Sorted) Delete(item types.Hashable) (err error) { return errors.Errorf("item %v not in the table", item) } return s.list.Remove(i) - return nil } func (s *Sorted) Equals(b types.Equatable) bool { @@ -179,30 +178,33 @@ func (s *Sorted) String() string { } func (s *Sorted) Find(item types.Hashable) (int, bool, error) { + return Find(&s.list, item) +} +func Find(lst *List, item types.Hashable) (int, bool, error) { var l int = 0 - var r int = s.Size() - 1 + var r int = lst.Size() - 1 var m int for l <= r { m = ((r - l) >> 1) + l - im, err := s.list.Get(m) + im, err := lst.Get(m) if err != nil { return -1, false, err } - if item.Less(im) { - r = m - 1 - } else if item.Equals(im) { + if im.Less(item) { + l = m + 1 + } else if im.Equals(item) { for j := m; j > 0; j-- { - ij_1, err := s.list.Get(j - 1) + ij_1, err := lst.Get(j - 1) if err != nil { return -1, false, err } - if !item.Equals(ij_1) { + if !ij_1.Equals(item) { return j, true, nil } } return 0, true, nil } else { - l = m + 1 + r = m - 1 } } return l, false, nil diff --git a/types/map_entry.go b/types/map_entry.go index 2c15673..41220da 100644 --- a/types/map_entry.go +++ b/types/map_entry.go @@ -30,5 +30,5 @@ func (m *MapEntry) Hash() int { } func (m *MapEntry) String() string { - return fmt.Sprintf("", m.Key, m.Value) + return fmt.Sprintf("%v: %v", m.Key, m.Value) } diff --git a/types/string.go b/types/string.go index d58393b..afecf75 100644 --- a/types/string.go +++ b/types/string.go @@ -2,6 +2,7 @@ package types import ( "bytes" + "fmt" "hash/fnv" ) @@ -39,6 +40,10 @@ func (self String) Hash() int { return int(h.Sum32()) } +func (self String) String() string { + return fmt.Sprintf(`"%v"`, string(self)) +} + func (self *ByteSlice) MarshalBinary() ([]byte, error) { return []byte(*self), nil }