common/Util.cpp | 37 ++++++++++++++++--------- common/Util.hpp | 8 ++--- kit/Kit.cpp | 13 ++++++-- loolwsd.xml.in | 3 +- test/WhiteBoxTests.cpp | 72 +++++++++++++++++++++++++++++++++++-------------- wsd/LOOLWSD.cpp | 8 ++++- wsd/LOOLWSD.hpp | 6 ++-- 7 files changed, 103 insertions(+), 44 deletions(-)
New commits: commit 75b9ae0b41f26916833b67f5cd7e574be034de92 Author: Ashod Nakashian <ashod.nakash...@collabora.co.uk> AuthorDate: Sun Apr 14 12:21:19 2019 -0400 Commit: Ashod Nakashian <ashnak...@gmail.com> CommitDate: Sun Apr 14 20:48:07 2019 +0200 wsd: improved anonymization algorithm Better hashing algorithm based on FNV-1a. Adds support for salting the hash, and for providing salt via configuration. More unit-tests added, and better formatting. Change-Id: I2be42675d0cdbaa73c3d7faed99e07631a9c20fc Reviewed-on: https://gerrit.libreoffice.org/70034 Reviewed-by: Ashod Nakashian <ashnak...@gmail.com> Tested-by: Ashod Nakashian <ashnak...@gmail.com> diff --git a/common/Util.cpp b/common/Util.cpp index 048526a49..a27ed5a22 100644 --- a/common/Util.cpp +++ b/common/Util.cpp @@ -274,16 +274,16 @@ namespace Util return true; } - std::string encodeId(const unsigned number, const int padding) + std::string encodeId(const std::uint64_t number, const int padding) { std::ostringstream oss; oss << std::hex << std::setw(padding) << std::setfill('0') << number; return oss.str(); } - unsigned decodeId(const std::string& str) + std::uint64_t decodeId(const std::string& str) { - unsigned id = 0; + std::uint64_t id = 0; std::stringstream ss; ss << std::hex << str; ss >> id; @@ -651,7 +651,7 @@ namespace Util } static std::map<std::string, std::string> AnonymizedStrings; - static std::atomic<unsigned> AnonymizationSalt(0); + static std::atomic<unsigned> AnonymizationCounter(0); static std::mutex AnonymizedMutex; void mapAnonymized(const std::string& plain, const std::string& anonymized) @@ -666,7 +666,7 @@ namespace Util AnonymizedStrings[plain] = anonymized; } - std::string anonymize(const std::string& text) + std::string anonymize(const std::string& text, const std::uint64_t nAnonymizationSalt) { { std::unique_lock<std::mutex> lock(AnonymizedMutex); @@ -679,15 +679,26 @@ namespace Util } } - // We just need something irreversible, short, and - // quite simple. - std::size_t hash = 0; + // Modified 64-bit FNV-1a to add salting. + // For the algorithm and the magic numbers, see http://isthe.com/chongo/tech/comp/fnv/ + std::uint64_t hash = 0xCBF29CE484222325LL; + hash ^= nAnonymizationSalt; + hash *= 0x100000001b3ULL; for (const char c : text) - hash += c; + { + hash ^= static_cast<std::uint64_t>(c); + hash *= 0x100000001b3ULL; + } + + hash ^= nAnonymizationSalt; + hash *= 0x100000001b3ULL; // Generate the anonymized string. The '#' is to hint that it's anonymized. - // Prepend with salt to make it unique, in case we get collisions (which we will, eventually). - const std::string res = '#' + Util::encodeId(AnonymizationSalt++, 0) + '#' + Util::encodeId(hash, 0) + '#'; + // Prepend with count to make it unique within a single process instance, + // in case we get collisions (which we will, eventually). N.B.: Identical + // strings likely to have different prefixes when logged in WSD process vs. Kit. + const std::string res + = '#' + Util::encodeId(AnonymizationCounter++, 0) + '#' + Util::encodeId(hash, 0) + '#'; mapAnonymized(text, res); return res; } @@ -702,7 +713,7 @@ namespace Util return filename; } - std::string anonymizeUrl(const std::string& url) + std::string anonymizeUrl(const std::string& url, const std::uint64_t nAnonymizationSalt) { std::string base; std::string filename; @@ -710,7 +721,7 @@ namespace Util std::string params; std::tie(base, filename, ext, params) = Util::splitUrl(url); - return base + Util::anonymize(filename) + ext + params; + return base + Util::anonymize(filename, nAnonymizationSalt) + ext + params; } } diff --git a/common/Util.hpp b/common/Util.hpp index 07af2a6a4..5bf6d9d03 100644 --- a/common/Util.hpp +++ b/common/Util.hpp @@ -67,9 +67,9 @@ namespace Util /// Hex to unsigned char bool dataFromHexString(const std::string& hexString, std::vector<unsigned char>& data); /// Encode an integral ID into a string, with padding support. - std::string encodeId(const unsigned number, const int padding = 5); + std::string encodeId(const std::uint64_t number, const int padding = 5); /// Decode an integral ID from a string. - unsigned decodeId(const std::string& str); + std::uint64_t decodeId(const std::string& str); bool windowingAvailable(); @@ -713,14 +713,14 @@ int main(int argc, char**argv) /// Anonymize a sensitive string to avoid leaking it. /// Called on strings to be logged or exposed. - std::string anonymize(const std::string& text); + std::string anonymize(const std::string& text, const std::uint64_t nAnonymizationSalt); /// Sets the anonymized version of a given plain-text string. /// After this, 'anonymize(plain)' will return 'anonymized'. void mapAnonymized(const std::string& plain, const std::string& anonymized); /// Anonymize the basename of filenames only, preserving the path and extension. - std::string anonymizeUrl(const std::string& url); + std::string anonymizeUrl(const std::string& url, const std::uint64_t nAnonymizationSalt); /// Extract and return the filename given a url or path. std::string getFilenameFromURL(const std::string& url); diff --git a/kit/Kit.cpp b/kit/Kit.cpp index 0a91009e3..019a061cb 100644 --- a/kit/Kit.cpp +++ b/kit/Kit.cpp @@ -114,6 +114,7 @@ class Document; static std::shared_ptr<Document> document; #ifndef BUILDING_TESTS static bool AnonymizeUserData = false; +static uint64_t AnonymizationSalt = 82589933; static std::string ObfuscatedFileId; #endif @@ -2278,7 +2279,13 @@ void lokit_main( LOG_INF("Setting log-level to [trace] and delaying setting to configured [" << LogLevel << "] until after Kit initialization."); } - AnonymizeUserData = std::getenv("LOOL_ANONYMIZE_USER_DATA") != nullptr; + const char* pAnonymizationSalt = std::getenv("LOOL_ANONYMIZATION_SALT"); + if (pAnonymizationSalt) + { + AnonymizationSalt = std::stoull(std::string(pAnonymizationSalt)); + AnonymizeUserData = true; + } + LOG_INF("User-data anonymization is " << (AnonymizeUserData ? "enabled." : "disabled.")); assert(!childRoot.empty()); @@ -2587,7 +2594,7 @@ void lokit_main( std::string anonymizeUrl(const std::string& url) { #ifndef BUILDING_TESTS - return AnonymizeUserData ? Util::anonymizeUrl(url) : url; + return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url; #else return url; #endif @@ -2669,7 +2676,7 @@ bool globalPreinit(const std::string &loTemplate) std::string anonymizeUsername(const std::string& username) { #ifndef BUILDING_TESTS - return AnonymizeUserData ? Util::anonymize(username) : username; + return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username; #else return username; #endif diff --git a/loolwsd.xml.in b/loolwsd.xml.in index db99c5c41..cd8075d41 100644 --- a/loolwsd.xml.in +++ b/loolwsd.xml.in @@ -52,7 +52,8 @@ <property name="flush" desc="Enable/disable flushing after logging each line. May harm performance. Note that without flushing after each line, the log lines from the different processes will not appear in chronological order.">false</property> </file> <anonymize> - <anonymize_user_data type="bool" desc="Enable to anonymize/obfuscate of user data in logs. If default is true, it was forced at compile-time and cannot be disabled." default="@LOOLWSD_ANONYMIZE_USER_DATA@">@LOOLWSD_ANONYMIZE_USER_DATA@</anonymize_user_data> + <anonymize_user_data type="bool" desc="Enable to anonymize/obfuscate of user-data in logs. If default is true, it was forced at compile-time and cannot be disabled." default="@LOOLWSD_ANONYMIZE_USER_DATA@">@LOOLWSD_ANONYMIZE_USER_DATA@</anonymize_user_data> + <anonymization_salt type="uint" desc="The salt used to anonymize/obfuscate user-data in logs. Use a secret 64-bit random number." default="82589933">82589933</anonymization_salt> </anonymize> </logging> diff --git a/test/WhiteBoxTests.cpp b/test/WhiteBoxTests.cpp index 3552af21d..801402e87 100644 --- a/test/WhiteBoxTests.cpp +++ b/test/WhiteBoxTests.cpp @@ -675,35 +675,67 @@ void WhiteBoxTests::testJson() void WhiteBoxTests::testAnonymization() { static const std::string name = "some name with space"; - CPPUNIT_ASSERT_EQUAL(std::string("#0#77d#"), Util::anonymizeUrl(name)); + static const std::string filename = "filename.ext"; + static const std::string filenameTestx = "testx (6).odt"; + static const std::string path = "/path/to/filename.ext"; + static const std::string plainUrl + = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/" + "736_ocgdpzbkm39u?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&" + "permission=edit"; + static const std::string fileUrl = "http://localhost/owncloud/index.php/apps/richdocuments/" + "wopi/files/736_ocgdpzbkm39u/" + "secret.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&" + "access_token_ttl=0&permission=edit"; + + std::uint64_t nAnonymizationSalt = 1111111111182589933; + + CPPUNIT_ASSERT_EQUAL(std::string("#0#5e45aef91248a8aa#"), + Util::anonymizeUrl(name, nAnonymizationSalt)); + CPPUNIT_ASSERT_EQUAL(std::string("#1#8f8d95bd2a202d00#.odt"), + Util::anonymizeUrl(filenameTestx, nAnonymizationSalt)); + CPPUNIT_ASSERT_EQUAL(std::string("/path/to/#2#5c872b2d82ecc8a0#.ext"), + Util::anonymizeUrl(path, nAnonymizationSalt)); + CPPUNIT_ASSERT_EQUAL( + std::string("http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/" + "#3#22c6f0caad277666#?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_" + "token_ttl=0&permission=edit"), + Util::anonymizeUrl(plainUrl, nAnonymizationSalt)); + CPPUNIT_ASSERT_EQUAL( + std::string("http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/" + "736_ocgdpzbkm39u/" + "#4#294f0dfb18f6a80b#.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_" + "token_ttl=0&permission=edit"), + Util::anonymizeUrl(fileUrl, nAnonymizationSalt)); + + nAnonymizationSalt = 0; + + CPPUNIT_ASSERT_EQUAL(std::string("#0#5e45aef91248a8aa#"), Util::anonymizeUrl(name, nAnonymizationSalt)); Util::mapAnonymized(name, name); - CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name)); + CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name, nAnonymizationSalt)); - static const std::string filename = "filename.ext"; - CPPUNIT_ASSERT_EQUAL(std::string("#1#341#.ext"), Util::anonymizeUrl(filename)); - Util::mapAnonymized("filename", "filename"); - CPPUNIT_ASSERT_EQUAL(name, Util::anonymizeUrl(name)); + CPPUNIT_ASSERT_EQUAL(std::string("#2#5c872b2d82ecc8a0#.ext"), + Util::anonymizeUrl(filename, nAnonymizationSalt)); + Util::mapAnonymized("filename", "filename"); // Identity map of the filename without extension. + CPPUNIT_ASSERT_EQUAL(filename, Util::anonymizeUrl(filename, nAnonymizationSalt)); - static const std::string filenameTestx = "testx (6).odt"; - CPPUNIT_ASSERT_EQUAL(std::string("#2#2df#.odt"), Util::anonymizeUrl(filenameTestx)); - Util::mapAnonymized("testx (6)", "testx (6)"); - CPPUNIT_ASSERT_EQUAL(filenameTestx, Util::anonymizeUrl(filenameTestx)); + CPPUNIT_ASSERT_EQUAL(std::string("#1#8f8d95bd2a202d00#.odt"), + Util::anonymizeUrl(filenameTestx, nAnonymizationSalt)); + Util::mapAnonymized("testx (6)", + "testx (6)"); // Identity map of the filename without extension. + CPPUNIT_ASSERT_EQUAL(filenameTestx, Util::anonymizeUrl(filenameTestx, nAnonymizationSalt)); - static const std::string path = "/path/to/filename.ext"; - CPPUNIT_ASSERT_EQUAL(path, Util::anonymizeUrl(path)); + CPPUNIT_ASSERT_EQUAL(path, Util::anonymizeUrl(path, nAnonymizationSalt)); - static const std::string plainUrl = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/736_ocgdpzbkm39u?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&permission=edit"; - const std::string urlAnonymized = Util::replace(plainUrl, "736_ocgdpzbkm39u", "#3#5a1#"); - CPPUNIT_ASSERT_EQUAL(urlAnonymized, Util::anonymizeUrl(plainUrl)); + const std::string urlAnonymized = Util::replace(plainUrl, "736_ocgdpzbkm39u", "#3#22c6f0caad277666#"); + CPPUNIT_ASSERT_EQUAL(urlAnonymized, Util::anonymizeUrl(plainUrl, nAnonymizationSalt)); Util::mapAnonymized("736_ocgdpzbkm39u", "736_ocgdpzbkm39u"); - CPPUNIT_ASSERT_EQUAL(plainUrl, Util::anonymizeUrl(plainUrl)); + CPPUNIT_ASSERT_EQUAL(plainUrl, Util::anonymizeUrl(plainUrl, nAnonymizationSalt)); - static const std::string fileUrl = "http://localhost/owncloud/index.php/apps/richdocuments/wopi/files/736_ocgdpzbkm39u/secret.odt?access_token=Hn0zttjbwkvGWb5BHbDa5ArgTykJAyBl&access_token_ttl=0&permission=edit"; - const std::string urlAnonymized2 = Util::replace(fileUrl, "secret", "#4#286#"); - CPPUNIT_ASSERT_EQUAL(urlAnonymized2, Util::anonymizeUrl(fileUrl)); + const std::string urlAnonymized2 = Util::replace(fileUrl, "secret", "#4#294f0dfb18f6a80b#"); + CPPUNIT_ASSERT_EQUAL(urlAnonymized2, Util::anonymizeUrl(fileUrl, nAnonymizationSalt)); Util::mapAnonymized("secret", "736_ocgdpzbkm39u"); const std::string urlAnonymized3 = Util::replace(fileUrl, "secret", "736_ocgdpzbkm39u"); - CPPUNIT_ASSERT_EQUAL(urlAnonymized3, Util::anonymizeUrl(fileUrl)); + CPPUNIT_ASSERT_EQUAL(urlAnonymized3, Util::anonymizeUrl(fileUrl, nAnonymizationSalt)); } CPPUNIT_TEST_SUITE_REGISTRATION(WhiteBoxTests); diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp index e3d95e235..574b54bd1 100644 --- a/wsd/LOOLWSD.cpp +++ b/wsd/LOOLWSD.cpp @@ -687,6 +687,7 @@ std::string LOOLWSD::ConfigFile = LOOLWSD_CONFIGDIR "/loolwsd.xml"; std::string LOOLWSD::ConfigDir = LOOLWSD_CONFIGDIR "/conf.d"; std::string LOOLWSD::LogLevel = "trace"; bool LOOLWSD::AnonymizeUserData = false; +std::uint64_t LOOLWSD::AnonymizationSalt = 82589933; Util::RuntimeConstant<bool> LOOLWSD::SSLEnabled; Util::RuntimeConstant<bool> LOOLWSD::SSLTermination; unsigned LOOLWSD::MaxConnections; @@ -965,7 +966,12 @@ void LOOLWSD::initialize(Application& self) LOG_INF("Anonymization of user-data is " << (AnonymizeUserData ? "enabled." : "disabled.")); if (AnonymizeUserData) - setenv("LOOL_ANONYMIZE_USER_DATA", "1", true); + { + // Get the salt, if set, otherwise default, and set as envar, so the kits inherit it. + AnonymizationSalt = getConfigValue<std::uint64_t>(conf, "logging.anonymize.anonymization_salt", 82589933); + const std::string sAnonymizationSalt = std::to_string(AnonymizationSalt); + setenv("LOOL_ANONYMIZATION_SALT", sAnonymizationSalt.c_str(), true); + } { std::string proto = getConfigValue<std::string>(conf, "net.proto", ""); diff --git a/wsd/LOOLWSD.hpp b/wsd/LOOLWSD.hpp index 19f783e60..cadc0e66f 100644 --- a/wsd/LOOLWSD.hpp +++ b/wsd/LOOLWSD.hpp @@ -65,6 +65,7 @@ public: static std::string LOKitVersion; static std::string LogLevel; static bool AnonymizeUserData; + static std::uint64_t AnonymizationSalt; static std::atomic<unsigned> NumConnections; static bool TileCachePersistent; static std::unique_ptr<TraceFileWriter> TraceDumper; @@ -141,14 +142,14 @@ public: /// Anonymize the basename of filenames, preserving the path and extension. static std::string anonymizeUrl(const std::string& url) { - return AnonymizeUserData ? Util::anonymizeUrl(url) : url; + return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url; } /// Anonymize user names and IDs. /// Will use the Obfuscated User ID if one is provied via WOPI. static std::string anonymizeUsername(const std::string& username) { - return AnonymizeUserData ? Util::anonymize(username) : username; + return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username; } int innerMain(); @@ -184,6 +185,7 @@ private: void operator()(int& value) { value = _config.getInt(_name); } void operator()(unsigned int& value) { value = _config.getUInt(_name); } + void operator()(unsigned long& value) { value = _config.getUInt64(_name); } void operator()(bool& value) { value = _config.getBool(_name); } void operator()(std::string& value) { value = _config.getString(_name); } void operator()(double& value) { value = _config.getDouble(_name); } _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits