From d8a690cf1bc3c3616d43f81d3a9df0e8a917b042 Mon Sep 17 00:00:00 2001 From: John Wellbelove Date: Sat, 29 Aug 2015 16:26:20 +0100 Subject: [PATCH] Added Pearson hash --- pearson.cpp | 57 ++++++++++++++ pearson.h | 176 ++++++++++++++++++++++++++++++++++++++++++ test/test_pearson.cpp | 152 ++++++++++++++++++++++++++++++++++++ 3 files changed, 385 insertions(+) create mode 100644 pearson.cpp create mode 100644 pearson.h create mode 100644 test/test_pearson.cpp diff --git a/pearson.cpp b/pearson.cpp new file mode 100644 index 00000000..c02ee85d --- /dev/null +++ b/pearson.cpp @@ -0,0 +1,57 @@ +///\file + +/****************************************************************************** +The MIT License(MIT) + +Embedded Template Library. +https://github.com/ETLCPP/etl + +Copyright(c) 2015 jwellbelove + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +******************************************************************************/ + +#include + +namespace etl +{ + //*************************************************************************** + /// Pearson lookup table + /// \ingroup pearson + //*************************************************************************** + extern const uint8_t PEARSON_LOOKUP[] = + { + 228, 39, 61, 95, 227, 187, 0, 197, 31, 189, 161, 222, 34, 15, 221, 246, + 19, 234, 6, 50, 113, 3, 91, 63, 77, 245, 144, 2, 183, 196, 25, 226, + 97, 126, 48, 59, 217, 4, 100, 145, 12, 88, 203, 149, 80, 154, 38, 27, + 224, 218, 158, 115, 202, 79, 53, 83, 242, 36, 139, 131, 136, 191, 42, 170, + 23, 99, 156, 51, 143, 60, 233, 206, 62, 108, 17, 67, 81, 71, 93, 195, + 26, 231, 247, 96, 24, 200, 176, 209, 152, 212, 138, 165, 75, 185, 130, 248, + 125, 110, 10, 116, 201, 90, 69, 204, 85, 251, 78, 157, 47, 184, 169, 141, + 134, 230, 89, 21, 146, 46, 55, 128, 148, 207, 216, 11, 114, 199, 103, 102, + 166, 244, 5, 104, 225, 160, 132, 28, 172, 65, 121, 140, 153, 119, 198, 210, + 58, 87, 117, 177, 33, 22, 13, 37, 49, 174, 109, 40, 73, 211, 18, 167, + 164, 252, 168, 74, 30, 173, 35, 98, 66, 193, 94, 175, 86, 54, 179, 122, + 220, 151, 192, 29, 133, 254, 155, 127, 240, 232, 190, 180, 8, 68, 236, 20, + 137, 92, 219, 208, 52, 250, 147, 142, 111, 112, 120, 45, 135, 255, 123, 229, + 57, 182, 243, 124, 186, 253, 7, 237, 9, 16, 70, 171, 235, 107, 223, 118, + 215, 178, 194, 181, 43, 188, 106, 105, 64, 241, 84, 238, 159, 44, 32, 76, + 213, 163, 150, 101, 129, 14, 249, 205, 214, 1, 41, 56, 162, 72, 239, 82 + } ; +} diff --git a/pearson.h b/pearson.h new file mode 100644 index 00000000..9ad04794 --- /dev/null +++ b/pearson.h @@ -0,0 +1,176 @@ +///\file + +/****************************************************************************** +The MIT License(MIT) + +Embedded Template Library. +https://github.com/ETLCPP/etl + +Copyright(c) 2014 jwellbelove + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +******************************************************************************/ + +#ifndef __ETL_PEARSON__ +#define __ETL_PEARSON__ + +#include + +#include "static_assert.h" +#include "type_traits.h" +#include "endian.h" +#include "ihash.h" +#include "array.h" + +#if defined(COMPILER_KEIL) +#pragma diag_suppress 1300 +#endif + +///\defgroup pearson Pearson hash calculation +///\ingroup pearson + +namespace etl +{ + //*************************************************************************** + /// Pearson lookup table + /// \ingroup pearson + //*************************************************************************** + extern const uint8_t PEARSON_LOOKUP[]; + + //*************************************************************************** + /// Calculates a Pearson hash + ///\tparam HASH_LENGTH The number of elements in the hash. + ///\tparam ENDIANNESS The endianness of the calculation for input types larger than uint8_t. Default = endian::little. + /// \ingroup pearson + //*************************************************************************** + template + class pearson : public etl::ihash + { + public: + + typedef etl::array value_type; + + //************************************************************************* + /// Default constructor. + //************************************************************************* + pearson() + : first(true), + ihash(etl::endian(ENDIANNESS)) + { + reset(); + } + + //************************************************************************* + /// Constructor from range. + /// \param begin Start of the range. + /// \param end End of the range. + //************************************************************************* + template + pearson(TIterator begin, const TIterator end) + : first(true), + ihash(etl::endian(ENDIANNESS)) + { + reset(); + add(begin, end); + } + + //************************************************************************* + /// Resets the hash to the initial state. + //************************************************************************* + void reset() + { + hash.fill(0); + } + + //************************************************************************* + /// Adds a range. + /// \param begin + /// \param end + //************************************************************************* + template + void add(TIterator begin, const TIterator end) + { + ihash::add(begin, end); + } + + //************************************************************************* + /// Adds a value. + /// \param value The value to add to the hash. + //************************************************************************* + template + void add(TValue value) + { + ihash::add(value); + } + + //************************************************************************* + /// \param value The char to add to the hash. + //************************************************************************* + void add(uint8_t value) + { + if (first) + { + for (size_t i = 0; i < HASH_SIZE; ++i) + { + hash[i] = PEARSON_LOOKUP[(uint32_t(value) + i) % 256]; + } + + first = false; + } + else + { + for (size_t i = 0; i < HASH_SIZE; ++i) + { + hash[i] = PEARSON_LOOKUP[hash[i] ^ value]; + } + } + } + + //************************************************************************* + /// Gets the hash value. + //************************************************************************* + value_type value() const + { + return hash; + } + + //************************************************************************* + /// Conversion operator to value_type. + //************************************************************************* + operator value_type () const + { + return value(); + } + + //************************************************************************* + /// Gets the generic digest value. + //************************************************************************* + generic_digest digest() + { + return generic_digest(&hash[0], &hash[HASH_LENGTH]); + } + + private: + + bool first; + value_type hash; + }; +} + +#endif diff --git a/test/test_pearson.cpp b/test/test_pearson.cpp new file mode 100644 index 00000000..525264f0 --- /dev/null +++ b/test/test_pearson.cpp @@ -0,0 +1,152 @@ +/****************************************************************************** +The MIT License(MIT) + +Embedded Template Library. +https://github.com/ETLCPP/etl + +Copyright(c) 2015 jwellbelove + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +******************************************************************************/ + +#include + +#include +#include +#include +#include +#include + +#include "../pearson.h" +#include "../endian.h" + +const size_t HASH_SIZE = 8; +typedef etl::pearson::value_type hash_t; + +//*************************************************************************** +/// Pearson lookup table +/// \ingroup pearson +//*************************************************************************** +namespace etl +{ + extern const uint8_t PEARSON_LOOKUP[]; +} + +//*************************************************************************** +// Comparison calculator based on Wikipedia example. https://en.wikipedia.org/wiki/Pearson_hashing +//*************************************************************************** +template +hash_t Pearson_Compare(const TContainer& data) +{ + hash_t hash; + hash.fill(0); + + for (size_t j = 0; j < HASH_SIZE; ++j) + { + uint8_t subhash = etl::PEARSON_LOOKUP[(data[0] + j) % 256]; + + for (size_t i = 1; i < data.size(); ++i) + { + subhash = etl::PEARSON_LOOKUP[subhash ^ data[i]]; + } + + hash[j] = subhash; + } + + return hash; +} + +//*************************************************************************** +// Output stream for hash_t +//*************************************************************************** +std::ostream& operator <<(std::ostream& os, const hash_t& hash) +{ + for (size_t i = 0; i < hash.size(); ++i) + { + os << int(hash[i]) << " "; + } + + return os; +} + +namespace +{ + SUITE(test_pearson) + { + //************************************************************************* + TEST(test_pearson_constructor) + { + std::string data("123456789"); + + hash_t compare = Pearson_Compare(data); + hash_t hash = etl::pearson(data.begin(), data.end()); + + CHECK_EQUAL(compare, hash); + } + + //************************************************************************* + TEST(test_pearson_add_values) + { + std::string data("123456789"); + + etl::pearson pearson_calculator; + + for (size_t i = 0; i < data.size(); ++i) + { + pearson_calculator += data[i]; + } + + hash_t compare = Pearson_Compare(data); + hash_t hash = pearson_calculator; + + CHECK_EQUAL(compare, hash); + } + + //************************************************************************* + TEST(test_pearson_add_range) + { + std::string data("123456789"); + + etl::pearson pearson_calculator; + + pearson_calculator.add(data.begin(), data.end()); + + hash_t compare = Pearson_Compare(data); + hash_t hash = pearson_calculator.value(); + + CHECK_EQUAL(compare, hash); + } + + //************************************************************************* + TEST(test_pearson_add_range_endian) + { + std::vector data1 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; + std::vector data2 = { 0x04030201, 0x08070605 }; + std::vector data3 = { 0x01020304, 0x05060708 }; + + hash_t hash1 = etl::pearson(data1.begin(), data1.end()); + hash_t hash2 = etl::pearson(data2.begin(), data2.end()); + CHECK_EQUAL(hash1, hash2); + + hash_t hash3 = etl::pearson(data3.begin(), data3.end()); + CHECK_EQUAL(hash1, hash3); + } + }; +} +