From 79e1d54e4c5da5c8aac758d3a1484800234dbd6a Mon Sep 17 00:00:00 2001 From: Jeff Becker Date: Fri, 9 Dec 2016 09:08:03 -0500 Subject: [PATCH] implement simple bloom filter --- BloomFilter.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++++ BloomFilter.h | 31 ++++++++++++++++++++ build/CMakeLists.txt | 1 + filelist.mk | 2 +- 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 BloomFilter.cpp create mode 100644 BloomFilter.h diff --git a/BloomFilter.cpp b/BloomFilter.cpp new file mode 100644 index 00000000..eac931db --- /dev/null +++ b/BloomFilter.cpp @@ -0,0 +1,69 @@ +#include "BloomFilter.h" +#include "I2PEndian.h" +#include +#include + +namespace i2p +{ +namespace util +{ + + /** @brief decaying bloom filter implementation */ + class DecayingBloomFilter : public IBloomFilter + { + public: + + DecayingBloomFilter(const std::size_t size) + { + m_Size = size; + m_Data = new uint8_t[size]; + } + + /** @brief implements IBloomFilter::~IBloomFilter */ + ~DecayingBloomFilter() + { + delete [] m_Data; + } + + /** @brief implements IBloomFilter::Add */ + bool Add(const uint8_t * data, std::size_t len) + { + std::size_t idx; + uint8_t mask; + Get(data, len, idx, mask); + if(m_Data[idx] & mask) return false; // filter hit + m_Data[idx] |= mask; + return true; + } + + /** @brief implements IBloomFilter::Decay */ + void Decay() + { + // reset bloom filter buffer + memset(m_Data, 0, m_Size); + } + + private: + /** @brief get bit index for for data */ + void Get(const uint8_t * data, std::size_t len, std::size_t & idx, uint8_t & bm) + { + bm = 1; + uint8_t digest[32]; + // TODO: use blake2 because it's faster + SHA256(data, len, digest); + uint64_t i = buf64toh(digest); + idx = i % m_Size; + bm <<= (i % 8); + } + + uint8_t * m_Data; + std::size_t m_Size; + }; + + + BloomFilterPtr BloomFilter(std::size_t capacity) + { + return std::make_shared(capacity); + } +} +} diff --git a/BloomFilter.h b/BloomFilter.h new file mode 100644 index 00000000..7d4b28fa --- /dev/null +++ b/BloomFilter.h @@ -0,0 +1,31 @@ +#ifndef BLOOM_FILTER_H_ +#define BLOOM_FILTER_H_ +#include +#include + +namespace i2p +{ +namespace util +{ + + /** @brief interface for bloom filter */ + struct IBloomFilter + { + + /** @brief destructor */ + virtual ~IBloomFilter(); + /** @brief add entry to bloom filter, return false if filter hit otherwise return true */ + virtual bool Add(const uint8_t * data, std::size_t len) = 0; + /** @brief optionally decay old entries */ + virtual void Decay(); + }; + + typedef std::shared_ptr BloomFilterPtr; + + /** @brief create bloom filter */ + BloomFilterPtr BloomFilter(std::size_t capacity = 1024 * 8); + +} +} + +#endif diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt index 8b42c063..a1fd19c2 100644 --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -26,6 +26,7 @@ set ( CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" ) set ( CMAKE_SOURCE_DIR ".." ) set (LIBI2PD_SRC + "${CMAKE_SOURCE_DIR}/BloomFilter.cpp" "${CMAKE_SOURCE_DIR}/Config.cpp" "${CMAKE_SOURCE_DIR}/Crypto.cpp" "${CMAKE_SOURCE_DIR}/Garlic.cpp" diff --git a/filelist.mk b/filelist.mk index 94ce2f22..76f58785 100644 --- a/filelist.mk +++ b/filelist.mk @@ -1,5 +1,5 @@ LIB_SRC = \ - Gzip.cpp Crypto.cpp Datagram.cpp Garlic.cpp I2NPProtocol.cpp LeaseSet.cpp \ + BloomFilter.cpp Gzip.cpp Crypto.cpp Datagram.cpp Garlic.cpp I2NPProtocol.cpp LeaseSet.cpp \ Log.cpp NTCPSession.cpp NetDb.cpp NetDbRequests.cpp Profiling.cpp \ Reseed.cpp RouterContext.cpp RouterInfo.cpp Signature.cpp SSU.cpp \ SSUSession.cpp SSUData.cpp Streaming.cpp Identity.cpp TransitTunnel.cpp \