| // The MIT License (MIT) |
| // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt |
| |
| // Permission is hereby granted, free of charge, to any person obtaining a copy of |
| // this software and associated documentation files (the "Software"), to deal in |
| // the Software without restriction, including without limitation the rights to |
| // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |
| // the Software, and to permit persons to whom the Software is furnished to do so, |
| // subject to the following conditions: |
| |
| // The above copyright notice and this permission notice shall be included in all |
| // copies or substantial portions of the Software. |
| |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
| // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
| // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
| // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| |
| package z |
| |
| import ( |
| "bytes" |
| "encoding/json" |
| "log" |
| "math" |
| "unsafe" |
| ) |
| |
| // helper |
| var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128} |
| |
| func getSize(ui64 uint64) (size uint64, exponent uint64) { |
| if ui64 < uint64(512) { |
| ui64 = uint64(512) |
| } |
| size = uint64(1) |
| for size < ui64 { |
| size <<= 1 |
| exponent++ |
| } |
| return size, exponent |
| } |
| |
| func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) { |
| size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2) |
| locs := math.Ceil(float64(0.69314718056) * size / numEntries) |
| return uint64(size), uint64(locs) |
| } |
| |
| // NewBloomFilter returns a new bloomfilter. |
| func NewBloomFilter(params ...float64) (bloomfilter *Bloom) { |
| var entries, locs uint64 |
| if len(params) == 2 { |
| if params[1] < 1 { |
| entries, locs = calcSizeByWrongPositives(params[0], params[1]) |
| } else { |
| entries, locs = uint64(params[0]), uint64(params[1]) |
| } |
| } else { |
| log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" + |
| " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," + |
| " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))") |
| } |
| size, exponent := getSize(entries) |
| bloomfilter = &Bloom{ |
| sizeExp: exponent, |
| size: size - 1, |
| setLocs: locs, |
| shift: 64 - exponent, |
| } |
| bloomfilter.Size(size) |
| return bloomfilter |
| } |
| |
| // Bloom filter |
| type Bloom struct { |
| bitset []uint64 |
| ElemNum uint64 |
| sizeExp uint64 |
| size uint64 |
| setLocs uint64 |
| shift uint64 |
| } |
| |
| // <--- http://www.cse.yorku.ca/~oz/hash.html |
| // modified Berkeley DB Hash (32bit) |
| // hash is casted to l, h = 16bit fragments |
| // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) { |
| // hash := uint64(len(*b)) |
| // for _, c := range *b { |
| // hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash |
| // } |
| // h = hash >> bl.shift |
| // l = hash << bl.shift >> bl.shift |
| // return l, h |
| // } |
| |
| // Add adds hash of a key to the bloomfilter. |
| func (bl *Bloom) Add(hash uint64) { |
| h := hash >> bl.shift |
| l := hash << bl.shift >> bl.shift |
| for i := uint64(0); i < bl.setLocs; i++ { |
| bl.Set((h + i*l) & bl.size) |
| bl.ElemNum++ |
| } |
| } |
| |
| // Has checks if bit(s) for entry hash is/are set, |
| // returns true if the hash was added to the Bloom Filter. |
| func (bl Bloom) Has(hash uint64) bool { |
| h := hash >> bl.shift |
| l := hash << bl.shift >> bl.shift |
| for i := uint64(0); i < bl.setLocs; i++ { |
| if !bl.IsSet((h + i*l) & bl.size) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // AddIfNotHas only Adds hash, if it's not present in the bloomfilter. |
| // Returns true if hash was added. |
| // Returns false if hash was already registered in the bloomfilter. |
| func (bl *Bloom) AddIfNotHas(hash uint64) bool { |
| if bl.Has(hash) { |
| return false |
| } |
| bl.Add(hash) |
| return true |
| } |
| |
| // TotalSize returns the total size of the bloom filter. |
| func (bl *Bloom) TotalSize() int { |
| // The bl struct has 5 members and each one is 8 byte. The bitset is a |
| // uint64 byte slice. |
| return len(bl.bitset)*8 + 5*8 |
| } |
| |
| // Size makes Bloom filter with as bitset of size sz. |
| func (bl *Bloom) Size(sz uint64) { |
| bl.bitset = make([]uint64, sz>>6) |
| } |
| |
| // Clear resets the Bloom filter. |
| func (bl *Bloom) Clear() { |
| for i := range bl.bitset { |
| bl.bitset[i] = 0 |
| } |
| } |
| |
| // Set sets the bit[idx] of bitset. |
| func (bl *Bloom) Set(idx uint64) { |
| ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) |
| *(*uint8)(ptr) |= mask[idx%8] |
| } |
| |
| // IsSet checks if bit[idx] of bitset is set, returns true/false. |
| func (bl *Bloom) IsSet(idx uint64) bool { |
| ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) |
| r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1 |
| return r == 1 |
| } |
| |
| // bloomJSONImExport |
| // Im/Export structure used by JSONMarshal / JSONUnmarshal |
| type bloomJSONImExport struct { |
| FilterSet []byte |
| SetLocs uint64 |
| } |
| |
| // NewWithBoolset takes a []byte slice and number of locs per entry, |
| // returns the bloomfilter with a bitset populated according to the input []byte. |
| func newWithBoolset(bs *[]byte, locs uint64) *Bloom { |
| bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs)) |
| for i, b := range *bs { |
| *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b |
| } |
| return bloomfilter |
| } |
| |
| // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes |
| // returns bloom32 / bloom64 object. |
| func JSONUnmarshal(dbData []byte) (*Bloom, error) { |
| bloomImEx := bloomJSONImExport{} |
| if err := json.Unmarshal(dbData, &bloomImEx); err != nil { |
| return nil, err |
| } |
| buf := bytes.NewBuffer(bloomImEx.FilterSet) |
| bs := buf.Bytes() |
| bf := newWithBoolset(&bs, bloomImEx.SetLocs) |
| return bf, nil |
| } |
| |
| // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte. |
| func (bl Bloom) JSONMarshal() []byte { |
| bloomImEx := bloomJSONImExport{} |
| bloomImEx.SetLocs = bl.setLocs |
| bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3) |
| for i := range bloomImEx.FilterSet { |
| bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) + |
| uintptr(i))) |
| } |
| data, err := json.Marshal(bloomImEx) |
| if err != nil { |
| log.Fatal("json.Marshal failed: ", err) |
| } |
| return data |
| } |