[tor-commits] [torbirdy/master] Ensure compatibility for Thunderbird 59 (use proper imports)
commit b8e45f597969aa592e23c5f8dd34dfd397e9837f Author: Sukhbir SinghDate: Mon Feb 26 14:39:03 2018 -0500 Ensure compatibility for Thunderbird 59 (use proper imports) --- components/torbirdy.js | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/components/torbirdy.js b/components/torbirdy.js index 4dafef9..430c15f 100644 --- a/components/torbirdy.js +++ b/components/torbirdy.js @@ -1,7 +1,7 @@ var { interfaces: Ci, utils: Cu, classes: Cc } = Components; Cu.import("resource://gre/modules/XPCOMUtils.jsm"); -Cu.import("resource://gre/modules/imXPCOMUtils.jsm"); +Cu.import("resource:///modules/imXPCOMUtils.jsm"); Cu.import("resource://gre/modules/AddonManager.jsm"); Cu.import("resource://gre/modules/Preferences.jsm"); @@ -439,7 +439,6 @@ function TorBirdy() { this.setPrefs(); sanitizeDateHeaders(); - dump("TorBirdy registered!\n"); } TorBirdy.prototype = { @@ -471,7 +470,6 @@ TorBirdy.prototype = { onUninstalling: function(addon, needsRestart) { this.onStateChange(); if (addon.id == TB_ID) { - dump("TorBirdy uninstall requested\n"); this._uninstall = true; this.resetUserPrefs(); } @@ -480,7 +478,6 @@ TorBirdy.prototype = { onOperationCancelled: function(addon) { this.onStateChange(); if (addon.id == TB_ID) { - dump("Uninstall requested cancelled\n"); this._uninstall = false; this.setPrefs(); } @@ -491,14 +488,12 @@ TorBirdy.prototype = { }, resetUserPrefs: function() { -dump("Resetting user preferences to default\n"); // Clear the Thunderbird preferences we changed. for (let each in TorBirdyPrefs) { this.prefs.clearUserPref(each); } // Restore the older proxy preferences that were set prior to TorBirdy's install. -dump("Restoring proxy settings\n"); for (let i = 0; i < TorBirdyOldPrefs.length; i++) { var oldPref = TorBirdyOldPrefs[i]; var setValue = kRestoreBranch + oldPref; ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [torbirdy/master] Add locale for en-US (default English locale)
commit 39bd34681a709db42ad52c16d70daa0cd0f52644 Author: Sukhbir SinghDate: Mon Feb 26 14:47:33 2018 -0500 Add locale for en-US (default English locale) --- chrome/locale/en-US/torbirdy.dtd| 60 + chrome/locale/en-US/torbirdy.properties | 18 ++ 2 files changed, 78 insertions(+) diff --git a/chrome/locale/en-US/torbirdy.dtd b/chrome/locale/en-US/torbirdy.dtd new file mode 100644 index 000..c88803a --- /dev/null +++ b/chrome/locale/en-US/torbirdy.dtd @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/chrome/locale/en-US/torbirdy.properties b/chrome/locale/en-US/torbirdy.properties new file mode 100644 index 000..4f645eb --- /dev/null +++ b/chrome/locale/en-US/torbirdy.properties @@ -0,0 +1,18 @@ +torbirdy.name=TorBirdy + +torbirdy.enabled.tor=TorBirdy Enabled:Tor +torbirdy.enabled.jondo=TorBirdy Enabled:JonDo +torbirdy.enabled.custom=TorBirdy Enabled:Custom Proxy +torbirdy.enabled.torification=TorBirdy Enabled:Transparent Torification +torbirdy.enabled.whonix=TorBirdy Enabled:Whonix +torbirdy.disabled=TorBirdy:Disabled! +torbirdy.enabled=TorBirdy:Enabled + +torbirdy.email.prompt=TorBirdy has disabled Thunderbird's auto-configuration wizard to protect your anonymity.\n\nThe recommended security settings for %S have been set.\n\nYou can now configure the other account settings manually. + +torbirdy.email.advanced=Please note that changing the advanced settings of TorBirdy is NOT recommended.\n\nYou should only continue if you are sure of what you are doing. +torbirdy.email.advanced.nextwarning=Show this warning next time +torbirdy.email.advanced.title=TorBirdy Advanced Settings + +torbirdy.firstrun=You are now running TorBirdy.\n\nTo help protect your anonymity, TorBirdy will enforce the Thunderbird settings it has set, preventing them from being changed by you or by any add-on. There are some settings that can be changed and those are accessed through TorBirdy's preferences dialog. When TorBirdy is uninstalled or disabled, all settings that it changes are reset to their default values (the values prior to TorBirdy's install).\n\nIf you are a new user, it is recommended that you read through the TorBirdy website to understand what we are trying to accomplish with TorBirdy for our users. +torbirdy.website=https://trac.torproject.org/projects/tor/wiki/torbirdy ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [torbirdy/master] Drop support for versioned JavaScript (mozilla57; see bugzilla 1390106)
commit c9d9fa56d8f9eac0c5af0dfebf2d8857ff45e19a Author: Sukhbir SinghDate: Mon Feb 26 14:41:41 2018 -0500 Drop support for versioned JavaScript (mozilla57; see bugzilla 1390106) --- chrome/content/accountpref.xul | 2 +- chrome/content/firstruninfo.xul | 2 +- chrome/content/preferences.xul | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chrome/content/accountpref.xul b/chrome/content/accountpref.xul index baf419e..f460f64 100644 --- a/chrome/content/accountpref.xul +++ b/chrome/content/accountpref.xul @@ -14,7 +14,7 @@ onload="return org.torbirdy.accountprefs.onLoad();" ondialogaccept="return org.torbirdy.accountprefs.onAccept();"> - diff --git a/chrome/content/firstruninfo.xul b/chrome/content/firstruninfo.xul index 43f1d66..17fd73a 100644 --- a/chrome/content/firstruninfo.xul +++ b/chrome/content/firstruninfo.xul @@ -10,7 +10,7 @@ onload="return org.torbirdy.firstrun.onLoad();" ondialogaccept="window.close();"> - diff --git a/chrome/content/preferences.xul b/chrome/content/preferences.xul index 2e3c168..296f220 100644 --- a/chrome/content/preferences.xul +++ b/chrome/content/preferences.xul @@ -19,7 +19,7 @@ ondialogaccept="return org.torbirdy.prefs.onAccept();" onload="return org.torbirdy.prefs.onLoad();"> - ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [torbirdy/master] Update install.rdf
commit baccc82cc942daeaaeb369e885e1c7b6df40b690 Author: Sukhbir SinghDate: Mon Feb 26 14:45:41 2018 -0500 Update install.rdf - Bump version to 0.2.4 - Set minVersion to 52.0 and maxVersion to 59.* --- install.rdf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/install.rdf b/install.rdf index fefb89f..86d94b1 100644 --- a/install.rdf +++ b/install.rdf @@ -4,7 +4,7 @@ xmlns:em="http://www.mozilla.org/2004/em-rdf#;> castironthunderbirdc...@torproject.org -0.2.3 +0.2.4 2 chrome://castironthunderbirdclub/skin/images/tor.png chrome://castironthunderbirdclub/content/preferences.xul @@ -13,8 +13,8 @@ {3550f703-e582-4d05-9a08-453d09bdfdc6} - 45.0 - 52.* + 52.0 + 59.* ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [torbirdy/master] Update the ChangeLog
commit 38a66b01fecc11d188fffafdaf0ec32ddad3db77 Author: Sukhbir SinghDate: Mon Feb 26 14:48:35 2018 -0500 Update the ChangeLog --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 7d6869d..d95c5d2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ 0.2.4, * Update translations, adding all supported languages from Transifex + * Added support for Thunderbird 58 and Thunderbird 59 0.2.3, 04 Aug 2017 * Bug 21880: Enable encrypted email headers for Enigmail (Memory Hole) ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [torbirdy/master] Update the changelog
commit 30f59fa5df0c6bb22b024ba6e33dfbe16a1ac935 Author: Sukhbir SinghDate: Tue Feb 13 12:38:59 2018 -0500 Update the changelog --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index fcf7a26..7d6869d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +0.2.4, + * Update translations, adding all supported languages from Transifex + 0.2.3, 04 Aug 2017 * Bug 21880: Enable encrypted email headers for Enigmail (Memory Hole) * Bug 22569: Update Enigmail values for custom proxy settings ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Use Java8 idiom for toString method.
commit 2457eb5be72d508c4ec4e2d2c3b6f7a88c69ed4c Author: iwakehDate: Fri Oct 27 17:35:15 2017 + Use Java8 idiom for toString method. --- .../org/torproject/collector/bridgedescs/DescriptorBuilder.java | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java index cc57662..4ca9dd1 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java +++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java @@ -5,6 +5,7 @@ package org.torproject.collector.bridgedescs; import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; /** Builder for descriptors. * @@ -55,10 +56,7 @@ class DescriptorBuilder { @Override public String toString() { -StringBuilder full = new StringBuilder(); -for (Object part : this.parts) { - full.append(part.toString()); -} -return full.toString(); +return this.parts.stream().map(part -> part.toString()) +.collect(Collectors.joining("")); } } ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Adapt CollecTor to latest metrics-lib master branch.
commit fbb35f75da022a23912b937b1825d8f216abad07 Author: iwakehDate: Tue Feb 20 16:30:08 2018 + Adapt CollecTor to latest metrics-lib master branch. --- src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 53ad118..7601898 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -15,10 +15,10 @@ import org.torproject.collector.cron.CollecTorMain; import org.torproject.collector.persist.PersistenceUtils; import org.torproject.collector.persist.WebServerAccessLogPersistence; import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.Method; import org.torproject.descriptor.WebServerAccessLog; import org.torproject.descriptor.log.InternalLogDescriptor; import org.torproject.descriptor.log.InternalWebServerAccessLog; -import org.torproject.descriptor.log.Method; import org.torproject.descriptor.log.WebServerAccessLogImpl; import org.torproject.descriptor.log.WebServerAccessLogLine; ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Make logging statements comply to Metrics' standards.
commit 43cd15876635d763d0f6adbf6bcc5c7df6380406 Author: iwakehDate: Fri Oct 27 17:35:18 2017 + Make logging statements comply to Metrics' standards. Also edit here and there for more readability and less lines. --- .../bridgedescs/SanitizedBridgesWriter.java| 145 + 1 file changed, 62 insertions(+), 83 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java index b4cd49e..22bf8f7 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java +++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java @@ -167,9 +167,9 @@ public class SanitizedBridgesWriter extends CollecTorMain { && line.length() != ("-MM,".length() + 83 * 2)) || parts.length != 2) { logger.warn("Invalid line in bridge-ip-secrets file " -+ "starting with '" + line.substring(0, 7) + "'! " ++ "starting with '{}'! " + "Not calculating any IP address hashes in this " -+ "execution!"); ++ "execution!", line.substring(0, 7)); this.persistenceProblemWithSecrets = true; break; } @@ -178,19 +178,16 @@ public class SanitizedBridgesWriter extends CollecTorMain { this.secretsForHashingIpAddresses.put(month, secret); } if (!this.persistenceProblemWithSecrets) { - logger.debug("Read " - + this.secretsForHashingIpAddresses.size() + " secrets for " - + "hashing bridge IP addresses."); + logger.debug("Read {} secrets for hashing bridge IP addresses.", + this.secretsForHashingIpAddresses.size()); } } catch (DecoderException e) { -logger.warn("Failed to decode hex string in " -+ this.bridgeIpSecretsFile + "! Not calculating any IP " -+ "address hashes in this execution!", e); +logger.warn("Failed to decode hex string in {}! Not calculating any IP " ++ "address hashes in this execution!", this.bridgeIpSecretsFile, e); this.persistenceProblemWithSecrets = true; } catch (IOException e) { -logger.warn("Failed to read " -+ this.bridgeIpSecretsFile + "! Not calculating any IP " -+ "address hashes in this execution!", e); +logger.warn("Failed to read {}! Not calculating any IP " ++ "address hashes in this execution!", this.bridgeIpSecretsFile, e); this.persistenceProblemWithSecrets = true; } } @@ -490,8 +487,8 @@ public class SanitizedBridgesWriter extends CollecTorMain { } else if (line.startsWith("fingerprint ")) { if (!("fingerprint " + authorityFingerprint).equals(line)) { logger.warn("Mismatch between authority fingerprint expected from " -+ "file name (" + authorityFingerprint + ") and parsed from " -+ "\"fingerprint\" line (\"" + line + "\")."); ++ "file name ({}) and parsed from \"fingerprint\" " ++ "line (\"{}\").", authorityFingerprint, line); return; } header.append(line).newLine(); @@ -511,13 +508,13 @@ public class SanitizedBridgesWriter extends CollecTorMain { /* Parse the relevant parts of this r line. */ String[] parts = line.split(" "); if (parts.length < 9) { -logger.warn("Illegal line '" + line + "' in bridge network " -+ "status. Skipping descriptor."); +logger.warn("Illegal line '{}' in bridge network " ++ "status. Skipping descriptor.", line); return; } if (!Base64.isBase64(parts[2])) { -logger.warn("Illegal base64 character in r line '" + parts[2] -+ "'. Skipping descriptor."); +logger.warn("Illegal base64 character in r line '{}'. " ++ "Skipping descriptor.", parts[2]); return; } fingerprintBytes = Base64.decodeBase64(parts[2] + "=="); @@ -567,8 +564,8 @@ public class SanitizedBridgesWriter extends CollecTorMain { if (scrubbedOrAddress != null) { scrubbed.append("a " + scrubbedOrAddress + "\n"); } else { -logger.warn("Invalid address in line '" + line -+ "' in bridge network status. Skipping line!"); +logger.warn("Invalid address in line '{}' " ++ "in bridge network status. Skipping line!", line); } /* Nothing special about s, w, and p lines; just copy them. */ @@ -581,8 +578,8 @@ public class SanitizedBridgesWriter extends CollecTorMain { * network status. If there is, we should probably learn before
[tor-commits] [collector/release] Separate parsing and sanitizing steps for bridge descriptors.
commit d5aba97f9b6c4ee74735b183552b8435e5e0661b Author: Karsten LoesingDate: Fri Oct 27 19:26:57 2017 +0200 Separate parsing and sanitizing steps for bridge descriptors. First step towards implementing #20549. --- .../SanitizedBridgeDescriptorBuilder.java | 54 + .../bridgedescs/SanitizedBridgesWriter.java| 240 + 2 files changed, 161 insertions(+), 133 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java new file mode 100644 index 000..174a5ae --- /dev/null +++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java @@ -0,0 +1,54 @@ +package org.torproject.collector.bridgedescs; + +import java.util.ArrayList; +import java.util.List; + +/** Builder for sanitized bridge descriptors. + * + * This builder class can be used while parsing and sanitizing an original + * bridge descriptor. It accepts already sanitized {@code String}s and + * {@code StringBuilder}s as placeholders for parts that can only be sanitized + * after finishing the parsing step. */ +class SanitizedBridgeDescriptorBuilder { + + private List descriptorParts; + + private StringBuilder lastDescriptorPart; + + SanitizedBridgeDescriptorBuilder() { +this.descriptorParts = new ArrayList<>(); +this.lastDescriptorPart = new StringBuilder(); +this.descriptorParts.add(this.lastDescriptorPart); + } + + SanitizedBridgeDescriptorBuilder append(String sanitizedString) { +this.lastDescriptorPart.append(sanitizedString); +return this; + } + + SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) { +this.descriptorParts.add(placeholder); +this.lastDescriptorPart = new StringBuilder(); +this.descriptorParts.add(this.lastDescriptorPart); +return this; + } + + SanitizedBridgeDescriptorBuilder space() { +this.lastDescriptorPart.append(' '); +return this; + } + + SanitizedBridgeDescriptorBuilder newLine() { +this.lastDescriptorPart.append('\n'); +return this; + } + + @Override + public String toString() { +StringBuilder fullDescriptor = new StringBuilder(); +for (StringBuilder descriptorPart : this.descriptorParts) { + fullDescriptor.append(descriptorPart); +} +return fullDescriptor.toString(); + } +} diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java index e257245..1ef1d60 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java +++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java @@ -34,6 +34,7 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedMap; @@ -671,23 +672,20 @@ public class SanitizedBridgesWriter extends CollecTorMain { } /* Parse descriptor to generate a sanitized version. */ -String scrubbedDesc = null; +String address = null; String published = null; +byte[] fingerprintBytes = null; +StringBuilder scrubbedAddress = null; +Map scrubbedTcpPorts = new HashMap<>(); +Map scrubbedIpAddressesAndTcpPorts = new HashMap<>(); String masterKeyEd25519FromIdentityEd25519 = null; -try { - BufferedReader br = new BufferedReader(new StringReader( - new String(data, "US-ASCII"))); - StringBuilder scrubbed = new StringBuilder(); - String line = null; - byte[] fingerprintBytes = null; - String hashedBridgeIdentity = null; - String address = null; - String routerLine = null; - String scrubbedRouterLine = null; - String scrubbedAddress = null; +SanitizedBridgeDescriptorBuilder scrubbed = +new SanitizedBridgeDescriptorBuilder(); +try (BufferedReader br = new BufferedReader(new StringReader( +new String(data, "US-ASCII" { + scrubbed.append(Annotation.BridgeServer.toString()); + String line; String masterKeyEd25519 = null; - List orAddresses = null; - List scrubbedOrAddresses = null; boolean skipCrypto = false; while ((line = br.readLine()) != null) { @@ -706,15 +704,26 @@ public class SanitizedBridgesWriter extends CollecTorMain { return; } address = parts[2]; - routerLine = line; - -/* Store or-address parts in a list and sanitize them when we have - * read the fingerprint. */ + scrubbedAddress = new StringBuilder(); + StringBuilder scrubbedOrPort = new StringBuilder(); + scrubbedTcpPorts.put(scrubbedOrPort, parts[3]); +
[tor-commits] [collector/release] Add hasContent method to make even more use of DescriptorBuilder.
commit 5b68aaf8aa7c5f3769544061344e75f7884e87ef Author: iwakehDate: Fri Oct 27 17:35:19 2017 + Add hasContent method to make even more use of DescriptorBuilder. --- .../collector/bridgedescs/DescriptorBuilder.java | 4 +++ .../bridgedescs/SanitizedBridgesWriter.java| 42 +++--- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java index f530368..9d23adf 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java +++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java @@ -80,6 +80,10 @@ class DescriptorBuilder { return this; } + public boolean hasContent() { +return this.parts.size() > 1 || lastPart.length() > 0; + } + @Override public String toString() { if (!this.finalized) { diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java index 22bf8f7..af54e03 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java +++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java @@ -462,7 +462,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { boolean includesFingerprintLine = false; SortedMap scrubbedLines = new TreeMap<>(); try { - StringBuilder scrubbed = new StringBuilder(); + DescriptorBuilder scrubbed = new DescriptorBuilder(); BufferedReader br = new BufferedReader(new StringReader(new String( data, "US-ASCII"))); String line = null; @@ -499,10 +499,10 @@ public class SanitizedBridgesWriter extends CollecTorMain { } else if (line.startsWith("r ")) { /* Clear buffer from previously scrubbed lines. */ - if (scrubbed.length() > 0) { + if (scrubbed.hasContent()) { String scrubbedLine = scrubbed.toString(); scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); -scrubbed = new StringBuilder(); +scrubbed = new DescriptorBuilder(); } /* Parse the relevant parts of this r line. */ @@ -549,11 +549,13 @@ public class SanitizedBridgesWriter extends CollecTorMain { fingerprintBytes, descPublicationTime); String scrubbedDirPort = this.scrubTcpPort(dirPort, fingerprintBytes, descPublicationTime); - scrubbed.append("r " + nickname + " " - + hashedBridgeIdentityBase64 + " " - + hashedDescriptorIdentifier + " " + descPublicationTime - + " " + scrubbedAddress + " " + scrubbedOrPort + " " - + scrubbedDirPort + "\n"); + scrubbed.append("r ").append(nickname).space() + .append(hashedBridgeIdentityBase64).space() + .append(hashedDescriptorIdentifier).space() + .append(descPublicationTime).space() + .append(scrubbedAddress).space() + .append(scrubbedOrPort).space() + .append(scrubbedDirPort).newLine(); /* Sanitize any addresses in a lines using the fingerprint and * descriptor publication time from the previous r line. */ @@ -562,7 +564,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { line.substring("a ".length()), fingerprintBytes, descPublicationTime); if (scrubbedOrAddress != null) { -scrubbed.append("a " + scrubbedOrAddress + "\n"); +scrubbed.append("a ").append(scrubbedOrAddress).newLine(); } else { logger.warn("Invalid address in line '{}' " + "in bridge network status. Skipping line!", line); @@ -572,7 +574,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { } else if (line.startsWith("s ") || line.equals("s") || line.startsWith("w ") || line.equals("w") || line.startsWith("p ") || line.equals("p")) { - scrubbed.append(line + "\n"); + scrubbed.append(line).newLine(); /* There should be nothing else but r, a, w, p, and s lines in the * network status. If there is, we should probably learn before @@ -584,10 +586,10 @@ public class SanitizedBridgesWriter extends CollecTorMain { } } br.close(); - if (scrubbed.length() > 0) { + if (scrubbed.hasContent()) { String scrubbedLine = scrubbed.toString(); scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); -scrubbed = new StringBuilder(); +scrubbed = new DescriptorBuilder(); } if (!includesFingerprintLine) { header.append("fingerprint ").append(authorityFingerprint).newLine(); @@ -1119,7 +1121,7 @@ public
[tor-commits] [collector/release] Rename SanitizedBridgeDescriptorBuilder to DescriptorBuilder.
commit 266051f3397bb0f676054ce2459502f680bfab6d Author: iwakehDate: Fri Oct 27 17:35:10 2017 + Rename SanitizedBridgeDescriptorBuilder to DescriptorBuilder. The class doesn't 'know' about descriptor sanitization, it is only a sort of container for writing descriptors. It could be actually moved to some util package and used in other parsing steps, too. Also rename test helper classes to avoid naming conflicts. Remove 'descriptor' from variable names. Make DescriptorBuilder public. Adapt other classes as well as tests. --- .../collector/bridgedescs/DescriptorBuilder.java | 57 + .../SanitizedBridgeDescriptorBuilder.java | 54 .../bridgedescs/SanitizedBridgesWriter.java| 4 +- ...er.java => ExtraInfoTestDescriptorBuilder.java} | 4 +- ...ava => NetworkStatusTestDescriptorBuilder.java} | 4 +- .../bridgedescs/SanitizedBridgesWriterTest.java| 142 +++-- ...ilder.java => ServerTestDescriptorBuilder.java} | 4 +- ...TarballBuilder.java => TarballTestBuilder.java} | 12 +- ...ptorBuilder.java => TestDescriptorBuilder.java} | 2 +- 9 files changed, 145 insertions(+), 138 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java new file mode 100644 index 000..12a8956 --- /dev/null +++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java @@ -0,0 +1,57 @@ +/* Copyright 2018 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.bridgedescs; + +import java.util.ArrayList; +import java.util.List; + +/** Builder for descriptors. + * + * This builder class can be used while parsing and processing an original + * descriptor. It accepts {@code String}s, {@code DescriptorBuilder}s and + * {@code StringBuilder}s. The latter two as placeholders for parts that can + * only be processed after finishing the parsing step. */ +class DescriptorBuilder { + + private List parts; + + private StringBuilder lastPart; + + public DescriptorBuilder() { +this.parts = new ArrayList<>(); +this.lastPart = new StringBuilder(); +this.parts.add(this.lastPart); + } + + public DescriptorBuilder append(String sanitizedString) { +this.lastPart.append(sanitizedString); +return this; + } + + public DescriptorBuilder append(StringBuilder placeholder) { +this.parts.add(placeholder); +this.lastPart = new StringBuilder(); +this.parts.add(this.lastPart); +return this; + } + + public DescriptorBuilder space() { +this.lastPart.append(' '); +return this; + } + + public DescriptorBuilder newLine() { +this.lastPart.append('\n'); +return this; + } + + @Override + public String toString() { +StringBuilder full = new StringBuilder(); +for (StringBuilder part : this.parts) { + full.append(part); +} +return full.toString(); + } +} diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java deleted file mode 100644 index 174a5ae..000 --- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.torproject.collector.bridgedescs; - -import java.util.ArrayList; -import java.util.List; - -/** Builder for sanitized bridge descriptors. - * - * This builder class can be used while parsing and sanitizing an original - * bridge descriptor. It accepts already sanitized {@code String}s and - * {@code StringBuilder}s as placeholders for parts that can only be sanitized - * after finishing the parsing step. */ -class SanitizedBridgeDescriptorBuilder { - - private List descriptorParts; - - private StringBuilder lastDescriptorPart; - - SanitizedBridgeDescriptorBuilder() { -this.descriptorParts = new ArrayList<>(); -this.lastDescriptorPart = new StringBuilder(); -this.descriptorParts.add(this.lastDescriptorPart); - } - - SanitizedBridgeDescriptorBuilder append(String sanitizedString) { -this.lastDescriptorPart.append(sanitizedString); -return this; - } - - SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) { -this.descriptorParts.add(placeholder); -this.lastDescriptorPart = new StringBuilder(); -this.descriptorParts.add(this.lastDescriptorPart); -return this; - } - - SanitizedBridgeDescriptorBuilder space() { -this.lastDescriptorPart.append(' '); -return this; - } - - SanitizedBridgeDescriptorBuilder newLine() { -this.lastDescriptorPart.append('\n'); -return this; - } - - @Override - public String toString() { -StringBuilder fullDescriptor = new StringBuilder(); -for (StringBuilder descriptorPart : this.descriptorParts) { -
[tor-commits] [collector/release] Use DescriptorBuilder more often.
commit 4e61bb792bc4cd4db9df6eb49ab88890b34ff489 Author: iwakehDate: Fri Oct 27 17:35:17 2017 + Use DescriptorBuilder more often. Add convenience constructor accepting the first string as argument. --- .../torproject/collector/bridgedescs/DescriptorBuilder.java | 5 + .../collector/bridgedescs/SanitizedBridgesWriter.java| 12 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java index 9c47b5e..f530368 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java +++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java @@ -34,6 +34,11 @@ class DescriptorBuilder { this.parts.add(this.lastPart); } + public DescriptorBuilder(String firstString) { +this(); +this.append(firstString); + } + private void throwExceptionIfFinalized() { if (this.finalized) { throw new IllegalStateException("This DescriptorBuilder is finalized and" diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java index 1d264a5..b4cd49e 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java +++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java @@ -461,7 +461,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { } /* Parse the given network status line by line. */ -StringBuilder header = new StringBuilder(); +DescriptorBuilder header = new DescriptorBuilder(); boolean includesFingerprintLine = false; SortedMap scrubbedLines = new TreeMap<>(); try { @@ -483,7 +483,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { /* Additional header lines don't have to be cleaned up. */ } else if (line.startsWith("flag-thresholds ")) { - header.append(line + "\n"); + header.append(line).newLine(); /* The authority fingerprint in the "fingerprint" line can go in * unscrubbed. */ @@ -494,7 +494,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { + "\"fingerprint\" line (\"" + line + "\")."); return; } - header.append(line + "\n"); + header.append(line).newLine(); includesFingerprintLine = true; /* r lines contain sensitive information that needs to be removed @@ -593,7 +593,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { scrubbed = new StringBuilder(); } if (!includesFingerprintLine) { -header.append("fingerprint ").append(authorityFingerprint).append("\n"); +header.append("fingerprint ").append(authorityFingerprint).newLine(); } /* Check if we can tell from the descriptor publication times @@ -879,13 +879,13 @@ public class SanitizedBridgesWriter extends CollecTorMain { /* Replace node fingerprints in the family line with their hashes * and leave nicknames unchanged. */ } else if (line.startsWith("family ")) { - StringBuilder familyLine = new StringBuilder("family"); + DescriptorBuilder familyLine = new DescriptorBuilder("family"); for (String s : line.substring(7).split(" ")) { if (s.startsWith("$")) { familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex( s.substring(1).toCharArray())).toUpperCase()); } else { - familyLine.append(" ").append(s); + familyLine.space().append(s); } } scrubbed.append(familyLine.toString()).newLine(); ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Reduce memory footprint and wall time.
commit 8557bf6255e6e3745088033e8e7bad7801421686 Author: iwakehDate: Tue Feb 20 16:30:09 2018 + Reduce memory footprint and wall time. Adapt to latest changes of metrics-lib (task-25329) and make use of the high redundancy of logs (e.g. a 3G file might only contain 350 different lines). This avoids OOM and array out of bounds exceptions for large files (>2G) and gives a speed-up of roughly 50%. (The earlier 66min are down to 34min for meronense files plus two larger files.) There is a BATCH constant, which could be tuned for processing speed. It is logged for each webstats module run. Currently, it is set to 100k. This was more or less arbitrarily chosen and used for all the tests. A test run using 500k didn't show significant differences. --- .../persist/WebServerAccessLogPersistence.java | 8 --- .../collector/webstats/SanitizeWeblogs.java| 61 ++ 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java index 792d3a9..dab4112 100644 --- a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java +++ b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java @@ -5,7 +5,6 @@ package org.torproject.collector.persist; import org.torproject.descriptor.WebServerAccessLog; import org.torproject.descriptor.internal.FileType; -import org.torproject.descriptor.log.InternalLogDescriptor; import org.torproject.descriptor.log.InternalWebServerAccessLog; import org.slf4j.Logger; @@ -30,13 +29,6 @@ public class WebServerAccessLogPersistence /** Prepare storing the given descriptor. */ public WebServerAccessLogPersistence(WebServerAccessLog desc) { super(desc, new byte[0]); -byte[] compressedBytes = null; -try { // The descriptor bytes have to be stored compressed. - compressedBytes = COMPRESSION.compress(desc.getRawDescriptorBytes()); - ((InternalLogDescriptor)desc).setRawDescriptorBytes(compressedBytes); -} catch (Exception ex) { - log.warn("Cannot compress â{}â. Storing uncompressed.", ex); -} calculatePaths(); } diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 7601898..1f2e922 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -3,6 +3,7 @@ package org.torproject.collector.webstats; +import static java.util.stream.Collectors.counting; import static java.util.stream.Collectors.groupingByConcurrent; import static java.util.stream.Collectors.toList; @@ -17,6 +18,7 @@ import org.torproject.collector.persist.WebServerAccessLogPersistence; import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.Method; import org.torproject.descriptor.WebServerAccessLog; +import org.torproject.descriptor.internal.FileType; import org.torproject.descriptor.log.InternalLogDescriptor; import org.torproject.descriptor.log.InternalWebServerAccessLog; import org.torproject.descriptor.log.WebServerAccessLogImpl; @@ -26,8 +28,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; -import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.InputStreamReader; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; @@ -40,6 +43,7 @@ import java.util.SortedSet; import java.util.StringJoiner; import java.util.TreeMap; import java.util.TreeSet; +import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -87,6 +91,7 @@ public class SanitizeWeblogs extends CollecTorMain { Set sources = this.config.getSourceTypeSet( Key.WebstatsSources); if (sources.contains(SourceType.Local)) { +log.info("Processing logs using batch value {}.", BATCH); findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins)); PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath)); } @@ -126,24 +131,60 @@ public class SanitizeWeblogs extends CollecTorMain { String name = new StringJoiner(InternalLogDescriptor.SEP) .add(virtualHost).add(physicalHost) .add(InternalWebServerAccessLog.MARKER) -.add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString(); +.add(date.format(DateTimeFormatter.BASIC_ISO_DATE)) +.toString() + "." + FileType.XZ.name().toLowerCase(); log.debug("Sanitizing {}.", name); -List retainedLines = lines +Map retainedLines = new TreeMap<>(lines .stream().parallel().map((line) ->
[tor-commits] [collector/release] Make DescriptorBuilder also accept DescriptorBuilders.
commit fbfa16c05b3f74acd60ccdf780568e7e1b0b9e1b Author: iwakehDate: Fri Oct 27 17:35:14 2017 + Make DescriptorBuilder also accept DescriptorBuilders. This might facilitate easier processing of descriptors. --- .../torproject/collector/bridgedescs/DescriptorBuilder.java | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java index 12a8956..cc57662 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java +++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java @@ -14,7 +14,7 @@ import java.util.List; * only be processed after finishing the parsing step. */ class DescriptorBuilder { - private List parts; + private List parts; private StringBuilder lastPart; @@ -36,6 +36,13 @@ class DescriptorBuilder { return this; } + public DescriptorBuilder append(DescriptorBuilder placeholder) { +this.parts.add(placeholder); +this.lastPart = new StringBuilder(); +this.parts.add(this.lastPart); +return this; + } + public DescriptorBuilder space() { this.lastPart.append(' '); return this; @@ -49,8 +56,8 @@ class DescriptorBuilder { @Override public String toString() { StringBuilder full = new StringBuilder(); -for (StringBuilder part : this.parts) { - full.append(part); +for (Object part : this.parts) { + full.append(part.toString()); } return full.toString(); } ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Add a finalized state to DescriptorBuilder.
commit afe07d8efd4dc94b9dfb9b5896002286ba71dc6d Author: iwakehDate: Fri Oct 27 17:35:16 2017 + Add a finalized state to DescriptorBuilder. To avoid possible inconsistencies DescriptorBuilder is finalized after the first call to 'toString' and cannot be altered anymore. Any attempt to add more leads to an IllegalStateException. --- .../collector/bridgedescs/DescriptorBuilder.java | 34 -- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java index 4ca9dd1..9c47b5e 100644 --- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java +++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java @@ -12,25 +12,43 @@ import java.util.stream.Collectors; * This builder class can be used while parsing and processing an original * descriptor. It accepts {@code String}s, {@code DescriptorBuilder}s and * {@code StringBuilder}s. The latter two as placeholders for parts that can - * only be processed after finishing the parsing step. */ + * only be processed after finishing the parsing step. + * + * Calling {@code toString} finalizes the builder and any subsequent + * method calls other than {@code toString} will result in an + * {@code IllegalStateException}. + */ class DescriptorBuilder { private List parts; private StringBuilder lastPart; + private boolean finalized = false; + + private String value; + public DescriptorBuilder() { this.parts = new ArrayList<>(); this.lastPart = new StringBuilder(); this.parts.add(this.lastPart); } + private void throwExceptionIfFinalized() { +if (this.finalized) { + throw new IllegalStateException("This DescriptorBuilder is finalized and" + + " calling anything other than 'toString' is illegal."); +} + } + public DescriptorBuilder append(String sanitizedString) { +this.throwExceptionIfFinalized(); this.lastPart.append(sanitizedString); return this; } public DescriptorBuilder append(StringBuilder placeholder) { +this.throwExceptionIfFinalized(); this.parts.add(placeholder); this.lastPart = new StringBuilder(); this.parts.add(this.lastPart); @@ -38,6 +56,7 @@ class DescriptorBuilder { } public DescriptorBuilder append(DescriptorBuilder placeholder) { +this.throwExceptionIfFinalized(); this.parts.add(placeholder); this.lastPart = new StringBuilder(); this.parts.add(this.lastPart); @@ -45,18 +64,27 @@ class DescriptorBuilder { } public DescriptorBuilder space() { +this.throwExceptionIfFinalized(); this.lastPart.append(' '); return this; } public DescriptorBuilder newLine() { +this.throwExceptionIfFinalized(); this.lastPart.append('\n'); return this; } @Override public String toString() { -return this.parts.stream().map(part -> part.toString()) -.collect(Collectors.joining("")); +if (!this.finalized) { + this.finalized = true; + this.value = this.parts.stream().map(part -> part.toString()) + .collect(Collectors.joining("")); + this.parts.clear(); // not needed anymore + this.lastPart = null; +} +return value; } + } ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Prepare for 1.5.0 release.
commit ddfa7bad243dc8e0a3105b14418794c87786e88f Author: Karsten LoesingDate: Mon Feb 26 14:23:05 2018 +0100 Prepare for 1.5.0 release. --- CERT | 20 ++-- CHANGELOG.md | 2 +- build.xml| 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/CERT b/CERT index d94c0c3..43be56e 100644 --- a/CERT +++ b/CERT @@ -1,8 +1,8 @@ -BEGIN CERTIFICATE- -MIIDaTCCAlGgAwIBAgIEZTniETANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV +MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU -b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw -ODE3MTg1MDQ0WhcNMTcxMTE1MTg1MDQ0WjBlMQswCQYDVQQGEwJVUzELMAkGA1UE +b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw +MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU @@ -11,11 +11,11 @@ Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud -DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAbsAc -gwl5KJH3pVKw4b+ACCOMgW+27MisCFbT1Izq2Wx+JcLMt3N//MoIpYOZWhsIeazW -/NE0fNbkLi0IYA0F1nUC9pHl44Hd8Gjfqa/YQUi9ALtgsY7l6W0sceW8WnZ8bu8J -DfrqnmB0bD2xc9ZjOn58al8dVjVWs95M87D9WCRU6LiaKFj5c45wciABQsTmC0qD -pyHYOaSGtXxXKDw5pAntdtHkCbowV5tDi/QQ8Tg7i5O7xwSh71Q7TZiNFMpLomBL -QllHfTZryFmoHyGn5MfngBUVCVHig5nXmk0dUMGuLiK4789dkgiPRz0vpB5Yf8Yy -CCE2jB6VBi2g5fMx0w== +DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU +qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x +RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq +MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO +zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG +qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW +2DiJiA8BLxZToe2XDA== -END CERTIFICATE- diff --git a/CHANGELOG.md b/CHANGELOG.md index a0b5d1f..eb14839 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# Changes in version 1.5.0 - 2018-01-31 +# Changes in version 1.5.0 - 2018-02-26 * Major changes - Update to metrics-lib 2.2.0. diff --git a/build.xml b/build.xml index 48f6e33..ff8302b 100644 --- a/build.xml +++ b/build.xml @@ -8,7 +8,7 @@ - + ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Circumvent Collection (integer) size limit.
commit d05b4e4aee3bc15c3e4d5bac660dfcee5bc26279 Author: iwakehDate: Tue Feb 20 16:30:14 2018 + Circumvent Collection (integer) size limit. Clean log lines immediately when they are read and also make use of sanitized log's high redundancy immediately, i.e., continue with maps of >. Rename method(s) to reflect what they do. --- .../collector/webstats/SanitizeWeblogs.java| 89 -- 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 1f2e922..5a270dd 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -4,8 +4,10 @@ package org.torproject.collector.webstats; import static java.util.stream.Collectors.counting; +import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.groupingByConcurrent; -import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.reducing; +import static java.util.stream.Collectors.summingLong; import org.torproject.collector.conf.Configuration; import org.torproject.collector.conf.ConfigurationException; @@ -35,6 +37,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; @@ -111,35 +115,36 @@ public class SanitizeWeblogs extends CollecTorMain { : virtualEntry.getValue().entrySet()) { String physicalHost = physicalEntry.getKey(); log.info("Processing logs for {} on {}.", virtualHost, physicalHost); -Map linesByDate +Map > linesByDate = physicalEntry.getValue().values().stream().parallel() -.flatMap((LogMetadata metadata) -> lineStream(metadata) - .filter((line) -> line.isValid())).parallel() -.collect(groupingByConcurrent(WebServerAccessLogLine::getDate)); +.flatMap(metadata -> sanitzedLineStream(metadata).entrySet() +.stream()) +.collect(groupingBy(Map.Entry::getKey, + reducing(Collections.emptyMap(), Map.Entry::getValue, +(e1, e2) -> Stream.concat(e1.entrySet().stream(), e2.entrySet() + .stream()) + .collect(groupingByConcurrent(Map.Entry::getKey, + summingLong(Map.Entry::getValue)); LocalDate[] interval = determineInterval(linesByDate.keySet()); linesByDate.entrySet().stream() .filter((entry) -> entry.getKey().isAfter(interval[0]) && entry.getKey().isBefore(interval[1])).parallel() -.forEach((entry) -> storeSanitized(virtualHost, physicalHost, +.forEach((entry) -> storeSortedAndForget(virtualHost, physicalHost, entry.getKey(), entry.getValue())); } } } - private void storeSanitized(String virtualHost, String physicalHost, - LocalDate date, List lines) { + private void storeSortedAndForget(String virtualHost, String physicalHost, + LocalDate date, Map lineCounts) { String name = new StringJoiner(InternalLogDescriptor.SEP) .add(virtualHost).add(physicalHost) .add(InternalWebServerAccessLog.MARKER) .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)) .toString() + "." + FileType.XZ.name().toLowerCase(); -log.debug("Sanitizing {}.", name); -Map retainedLines = new TreeMap<>(lines -.stream().parallel().map((line) -> sanitize(line, date)) -.filter((line) -> line.isPresent()) -.map((line) -> line.get()) -.collect(groupingByConcurrent(line -> line, counting(; -lines.clear(); // not needed anymore +log.debug("Storing {}.", name); +Map retainedLines = new TreeMap<>(lineCounts); +lineCounts.clear(); // not needed anymore try { WebServerAccessLogPersistence walp = new WebServerAccessLogPersistence( @@ -187,8 +192,8 @@ public class SanitizeWeblogs extends CollecTorMain { .collect(Collectors.joining("\n", "", "\n")).getBytes(); } - static Optional sanitize(WebServerAccessLogLine logLine, - LocalDate date) { + static Optional + sanitize(WebServerAccessLogLine logLine) { if (!logLine.isValid() || !(Method.GET == logLine.getMethod() || Method.HEAD == logLine.getMethod()) @@ -203,10 +208,13 @@ public class SanitizeWeblogs extends CollecTorMain { if (queryStart > 0) { logLine.setRequest(logLine.getRequest().substring(0, queryStart)); } -
[tor-commits] [collector/release] Add webstats module with sync and local import functionality.
commit 97e577ae73ec631ac5d7448cb9f525594baa0c8a Author: iwakehDate: Mon Oct 9 12:23:53 2017 + Add webstats module with sync and local import functionality. Implements task-22428. --- CHANGELOG.md | 6 +- build.xml | 2 +- src/main/java/org/torproject/collector/Main.java | 2 + .../torproject/collector/conf/Configuration.java | 3 +- .../java/org/torproject/collector/conf/Key.java| 9 +- .../collector/persist/DescriptorPersistence.java | 2 + .../persist/WebServerAccessLogPersistence.java | 73 .../torproject/collector/sync/SyncPersistence.java | 7 + .../torproject/collector/webstats/LogFileMap.java | 115 .../torproject/collector/webstats/LogMetadata.java | 87 + .../collector/webstats/SanitizeWeblogs.java| 198 + src/main/resources/collector.properties| 20 ++- .../collector/conf/ConfigurationTest.java | 2 +- .../collector/cron/CollecTorMainTest.java | 1 + .../collector/sync/SyncPersistenceTest.java| 68 +++ .../collector/webstats/LogFileMapTest.java | 33 .../collector/webstats/LogMetadataTest.java| 82 + ...eotrichon.torproject.org_access.log_20151007.xz | Bin 0 -> 4056 bytes ...meronense.torproject.org_access.log_20170531.gz | Bin 0 -> 388 bytes 19 files changed, 671 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f4cd21..a0b5d1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ -# Changes in version 1.?.? - 201?-??-?? +# Changes in version 1.5.0 - 2018-01-31 + + * Major changes + - Update to metrics-lib 2.2.0. + - Add new module for processing and sanitizing Tor web server logs. * Minor changes - Exclude lastModifiedMillis in index.json. diff --git a/build.xml b/build.xml index f004f29..48f6e33 100644 --- a/build.xml +++ b/build.xml @@ -11,7 +11,7 @@ - + diff --git a/src/main/java/org/torproject/collector/Main.java b/src/main/java/org/torproject/collector/Main.java index 50cc8be..70cdbfa 100644 --- a/src/main/java/org/torproject/collector/Main.java +++ b/src/main/java/org/torproject/collector/Main.java @@ -14,6 +14,7 @@ import org.torproject.collector.exitlists.ExitListDownloader; import org.torproject.collector.index.CreateIndexJson; import org.torproject.collector.onionperf.OnionPerfDownloader; import org.torproject.collector.relaydescs.ArchiveWriter; +import org.torproject.collector.webstats.SanitizeWeblogs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,6 +52,7 @@ public class Main { collecTorMains.put(Key.UpdateindexActivated, CreateIndexJson.class); collecTorMains.put(Key.RelaydescsActivated, ArchiveWriter.class); collecTorMains.put(Key.OnionPerfActivated, OnionPerfDownloader.class); +collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class); } private static Configuration conf = new Configuration(); diff --git a/src/main/java/org/torproject/collector/conf/Configuration.java b/src/main/java/org/torproject/collector/conf/Configuration.java index 57f9731..72bd5fc 100644 --- a/src/main/java/org/torproject/collector/conf/Configuration.java +++ b/src/main/java/org/torproject/collector/conf/Configuration.java @@ -92,7 +92,8 @@ public class Configuration extends Observable implements Cloneable { || this.getBool(Key.BridgedescsActivated) || this.getBool(Key.ExitlistsActivated) || this.getBool(Key.UpdateindexActivated) -|| this.getBool(Key.OnionPerfActivated))) { +|| this.getBool(Key.OnionPerfActivated) +|| this.getBool(Key.WebstatsActivated))) { throw new ConfigurationException("Nothing is activated!\n" + "Please edit collector.properties. Exiting."); } diff --git a/src/main/java/org/torproject/collector/conf/Key.java b/src/main/java/org/torproject/collector/conf/Key.java index e0a20a7..6454009 100644 --- a/src/main/java/org/torproject/collector/conf/Key.java +++ b/src/main/java/org/torproject/collector/conf/Key.java @@ -28,6 +28,7 @@ public enum Key { BridgeSources(SourceType[].class), ExitlistSources(SourceType[].class), OnionPerfSources(SourceType[].class), + WebstatsSources(SourceType[].class), RelayCacheOrigins(String[].class), RelayLocalOrigins(Path.class), RelaySyncOrigins(URL[].class), @@ -35,6 +36,8 @@ public enum Key { BridgeLocalOrigins(Path.class), ExitlistSyncOrigins(URL[].class), OnionPerfSyncOrigins(URL[].class), + WebstatsSyncOrigins(URL[].class), + WebstatsLocalOrigins(Path.class), BridgedescsActivated(Boolean.class), BridgedescsOffsetMinutes(Integer.class), BridgedescsPeriodMinutes(Integer.class), @@ -58,7 +61,11 @@ public enum Key { KeepDirectoryArchiveImportHistory(Boolean.class), ReplaceIpAddressesWithHashes(Boolean.class),
[tor-commits] [collector/release] Exclude lastModifiedMillis in index.json.
commit b23232bd44c82defee92cbe1d697cafb7862205a Author: Karsten LoesingDate: Thu Dec 14 10:13:11 2017 +0100 Exclude lastModifiedMillis in index.json. Fixes #24621. --- CHANGELOG.md | 6 ++ src/main/java/org/torproject/collector/index/CreateIndexJson.java | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c924f5e..2f4cd21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# Changes in version 1.?.? - 201?-??-?? + + * Minor changes + - Exclude lastModifiedMillis in index.json. + + # Changes in version 1.4.1 - 2017-10-26 * Medium changes diff --git a/src/main/java/org/torproject/collector/index/CreateIndexJson.java b/src/main/java/org/torproject/collector/index/CreateIndexJson.java index 5c4daf9..c4399b8 100644 --- a/src/main/java/org/torproject/collector/index/CreateIndexJson.java +++ b/src/main/java/org/torproject/collector/index/CreateIndexJson.java @@ -163,7 +163,8 @@ public class CreateIndexJson extends CollecTorMain { private void writeIndex(IndexNode indexNode) throws Exception { indexJsonFile.getParentFile().mkdirs(); -Gson gson = new GsonBuilder().create(); +Gson gson = new GsonBuilder().excludeFieldsWithoutExposeAnnotation() +.create(); String indexNodeString = gson.toJson(indexNode); for (String filename : new String[] {indexJsonFile.toString(), indexJsonFile + ".gz", indexJsonFile + ".xz", indexJsonFile + ".bz2"}) { ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Avoid repeated validation of clean and validated log lines.
commit 06d1a81d4cfe8ce89caa6cf49124f6700b33f522 Author: iwakehDate: Wed Jan 31 13:31:28 2018 + Avoid repeated validation of clean and validated log lines. --- src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 635457c..53ad118 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -136,7 +136,7 @@ public class SanitizeWeblogs extends CollecTorMain { try { WebServerAccessLogPersistence walp = new WebServerAccessLogPersistence( - new WebServerAccessLogImpl(retainedLines, name)); + new WebServerAccessLogImpl(retainedLines, name, false)); log.debug("Storing {}.", name); walp.storeOut(this.outputPathName); walp.storeRecent(this.recentPathName); ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Bump version to 1.4.1-dev.
commit 60dfface9783b5715717dd10fbd90c4dc93e4321 Author: Karsten LoesingDate: Thu Oct 26 10:16:35 2017 +0200 Bump version to 1.4.1-dev. --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index fb87709..f004f29 100644 --- a/build.xml +++ b/build.xml @@ -8,7 +8,7 @@ - + ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Update metrics-base.
commit ee7f1353a22b3d19857722b5b68604e2517012c5 Author: Karsten LoesingDate: Fri Dec 15 17:01:27 2017 +0100 Update metrics-base. --- src/build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/build b/src/build index 4b34756..23c6e0b 16 --- a/src/build +++ b/src/build @@ -1 +1 @@ -Subproject commit 4b34756ddd71ccaf0fc30e5f5bf0a813a297d4a3 +Subproject commit 23c6e0be5fab9463f137615053ef412e4da2315e ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Parallelize two more processing steps.
commit 15db1e2a793ac7e67a1e7aa87c2ea857825a98a2 Author: iwakehDate: Wed Jan 31 13:31:25 2018 + Parallelize two more processing steps. --- src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index a3d2a7e..4496861 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -113,7 +113,7 @@ public class SanitizeWeblogs extends CollecTorMain { LocalDate[] interval = determineInterval(linesByDate.keySet()); linesByDate.entrySet().stream() .filter((entry) -> entry.getKey().isAfter(interval[0]) - && entry.getKey().isBefore(interval[1])) + && entry.getKey().isBefore(interval[1])).parallel() .forEach((entry) -> storeSanitized(virtualHost, physicalHost, entry.getKey(), entry.getValue())); } @@ -128,7 +128,7 @@ public class SanitizeWeblogs extends CollecTorMain { .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString(); log.debug("Sanitizing {}.", name); List retainedLines = lines -.stream().map((line) -> sanitize(line, date)) +.stream().parallel().map((line) -> sanitize(line, date)) .filter((line) -> line.isPresent()).map((line) -> line.get()) .collect(Collectors.toList()); retainedLines.sort(null); ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Update copyright to 2018.
commit 7f01208aedf150822f589375bf2cf45b3a6af5c0 Author: Karsten LoesingDate: Tue Jan 9 10:23:10 2018 +0100 Update copyright to 2018. --- src/main/java/org/torproject/collector/Main.java| 2 +- .../org/torproject/collector/bridgedescs/BridgeDescriptorParser.java| 2 +- .../java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java | 2 +- .../org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java| 2 +- src/main/java/org/torproject/collector/conf/Annotation.java | 2 +- src/main/java/org/torproject/collector/conf/Configuration.java | 2 +- src/main/java/org/torproject/collector/conf/ConfigurationException.java | 2 +- src/main/java/org/torproject/collector/conf/Key.java| 2 +- src/main/java/org/torproject/collector/conf/SourceType.java | 2 +- src/main/java/org/torproject/collector/cron/CollecTorMain.java | 2 +- src/main/java/org/torproject/collector/cron/Scheduler.java | 2 +- src/main/java/org/torproject/collector/cron/ShutdownHook.java | 2 +- .../java/org/torproject/collector/exitlists/ExitListDownloader.java | 2 +- src/main/java/org/torproject/collector/index/CreateIndexJson.java | 2 +- .../java/org/torproject/collector/onionperf/OnionPerfDownloader.java| 2 +- .../org/torproject/collector/persist/BridgeExtraInfoPersistence.java| 2 +- .../torproject/collector/persist/BridgeServerDescriptorPersistence.java | 2 +- .../java/org/torproject/collector/persist/ConsensusPersistence.java | 2 +- .../java/org/torproject/collector/persist/DescriptorPersistence.java| 2 +- src/main/java/org/torproject/collector/persist/ExitlistPersistence.java | 2 +- .../java/org/torproject/collector/persist/ExtraInfoPersistence.java | 2 +- .../org/torproject/collector/persist/MicroConsensusPersistence.java | 2 +- .../java/org/torproject/collector/persist/OnionPerfPersistence.java | 2 +- src/main/java/org/torproject/collector/persist/PersistenceUtils.java| 2 +- .../org/torproject/collector/persist/ServerDescriptorPersistence.java | 2 +- src/main/java/org/torproject/collector/persist/StatusPersistence.java | 2 +- src/main/java/org/torproject/collector/persist/VotePersistence.java | 2 +- src/main/java/org/torproject/collector/persist/package-info.java| 2 +- src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java| 2 +- src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java| 2 +- .../torproject/collector/relaydescs/CachedRelayDescriptorReader.java| 2 +- src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java | 2 +- .../org/torproject/collector/relaydescs/RelayDescriptorDownloader.java | 2 +- .../java/org/torproject/collector/relaydescs/RelayDescriptorParser.java | 2 +- src/main/java/org/torproject/collector/sync/Criterium.java | 2 +- src/main/java/org/torproject/collector/sync/ProcessCriterium.java | 2 +- src/main/java/org/torproject/collector/sync/SyncManager.java| 2 +- src/main/java/org/torproject/collector/sync/SyncPersistence.java| 2 +- src/main/java/org/torproject/collector/sync/package-info.java | 2 +- src/main/resources/bootstrap-development.sh | 2 +- src/main/resources/create-tarballs.sh | 2 +- src/test/java/org/torproject/collector/MainTest.java| 2 +- .../torproject/collector/bridgedescs/BridgeDescriptorParserTest.java| 2 +- .../java/org/torproject/collector/bridgedescs/DescriptorBuilder.java| 2 +- .../torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java| 2 +- .../java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java | 2 +- .../torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java| 2 +- .../org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java | 2 +- src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java | 2 +- src/test/java/org/torproject/collector/conf/ConfigurationTest.java | 2 +- src/test/java/org/torproject/collector/cron/Broken.java | 2 +- src/test/java/org/torproject/collector/cron/CollecTorMainTest.java | 2 +- src/test/java/org/torproject/collector/cron/SchedulerTest.java | 2 +- src/test/java/org/torproject/collector/persist/PersistUtilsTest.java| 2 +- .../java/org/torproject/collector/relaydescs/ReferenceCheckerTest.java | 2 +- src/test/java/org/torproject/collector/sync/FileCollector.java | 2 +- src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java| 2 +- 57 files changed, 57 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/torproject/collector/Main.java b/src/main/java/org/torproject/collector/Main.java index 1377fec..50cc8be 100644 --- a/src/main/java/org/torproject/collector/Main.java +++
[tor-commits] [collector/release] Use enum Method from metrics-lib.
commit 2a0aa8c7f8c956ba15c5780bc078d5330322fcd5 Author: iwakehDate: Wed Jan 31 13:31:24 2018 + Use enum Method from metrics-lib. --- src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 88d62fa..a3d2a7e 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -15,6 +15,7 @@ import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.WebServerAccessLog; import org.torproject.descriptor.log.InternalLogDescriptor; import org.torproject.descriptor.log.InternalWebServerAccessLog; +import org.torproject.descriptor.log.Method; import org.torproject.descriptor.log.WebServerAccessLogImpl; import org.torproject.descriptor.log.WebServerAccessLogLine; @@ -146,8 +147,8 @@ public class SanitizeWeblogs extends CollecTorMain { static Optional sanitize(WebServerAccessLogLine logLine, LocalDate date) { if (!logLine.isValid() -|| !("GET".equals(logLine.getMethod()) - || "HEAD".equals(logLine.getMethod())) +|| !(Method.GET == logLine.getMethod() + || Method.HEAD == logLine.getMethod()) || !logLine.getProtocol().startsWith("HTTP") || 400 == logLine.getResponse() || 404 == logLine.getResponse()) { return Optional.empty(); ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/release] Optimize parallel processing and use static imports for readability.
commit bd948070e03ff71503fdba84cff6bc61c9fbe452 Author: iwakehDate: Wed Jan 31 13:31:26 2018 + Optimize parallel processing and use static imports for readability. --- .../torproject/collector/webstats/SanitizeWeblogs.java | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 4496861..635457c 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -3,6 +3,9 @@ package org.torproject.collector.webstats; +import static java.util.stream.Collectors.groupingByConcurrent; +import static java.util.stream.Collectors.toList; + import org.torproject.collector.conf.Configuration; import org.torproject.collector.conf.ConfigurationException; import org.torproject.collector.conf.Key; @@ -37,7 +40,6 @@ import java.util.SortedSet; import java.util.StringJoiner; import java.util.TreeMap; import java.util.TreeSet; -import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -106,10 +108,9 @@ public class SanitizeWeblogs extends CollecTorMain { log.info("Processing logs for {} on {}.", virtualHost, physicalHost); Map linesByDate = physicalEntry.getValue().values().stream().parallel() -.flatMap((LogMetadata metadata) --> lineStream(metadata).filter((line) -> line.isValid())) -.collect(Collectors.groupingBy(WebServerAccessLogLine::getDate, -Collectors.toList())); +.flatMap((LogMetadata metadata) -> lineStream(metadata) + .filter((line) -> line.isValid())).parallel() +.collect(groupingByConcurrent(WebServerAccessLogLine::getDate)); LocalDate[] interval = determineInterval(linesByDate.keySet()); linesByDate.entrySet().stream() .filter((entry) -> entry.getKey().isAfter(interval[0]) @@ -130,7 +131,7 @@ public class SanitizeWeblogs extends CollecTorMain { List retainedLines = lines .stream().parallel().map((line) -> sanitize(line, date)) .filter((line) -> line.isPresent()).map((line) -> line.get()) -.collect(Collectors.toList()); +.collect(toList()); retainedLines.sort(null); try { WebServerAccessLogPersistence walp @@ -142,6 +143,7 @@ public class SanitizeWeblogs extends CollecTorMain { } catch (DescriptorParseException dpe) { log.error("Cannot store log desriptor {}.", name, dpe); } +lines.clear(); } static Optional sanitize(WebServerAccessLogLine logLine, @@ -188,7 +190,7 @@ public class SanitizeWeblogs extends CollecTorMain { metadata.fileType.decompress(Files.readAllBytes(metadata.path)) { return br.lines() .map((String line) -> WebServerAccessLogLine.makeLine(line)) - .collect(Collectors.toList()).stream(); + .collect(toList()).stream(); } catch (Exception ex) { log.debug("Skipping log-file {}.", metadata.path, ex); } ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [metrics-lib/release] Accomodate logs with more than Integer.MAX_VALUE lines.
commit 35feb816f81f26bcc9dc035a1aaf496c34a86647 Author: iwakehDate: Fri Feb 16 09:05:46 2018 + Accomodate logs with more than Integer.MAX_VALUE lines. Implements task-23046. --- .../org/torproject/descriptor/LogDescriptor.java | 10 +-- .../torproject/descriptor/WebServerAccessLog.java | 6 .../descriptor/log/WebServerAccessLogImpl.java | 32 ++ .../descriptor/log/LogDescriptorTest.java | 5 +++- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java index 826fcda..8dd8460 100644 --- a/src/main/java/org/torproject/descriptor/LogDescriptor.java +++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java @@ -5,6 +5,7 @@ package org.torproject.descriptor; import java.io.InputStream; import java.util.List; +import java.util.stream.Stream; /** * Contains a log file. @@ -64,11 +65,14 @@ public interface LogDescriptor extends Descriptor { public List getUnrecognizedLines(); /** - * Returns a list of all parseable log lines. - * Might require a lot of memory depending on log size. + * Returns a stream of all parseable log lines. + * Depending on log size this might not fit into a collection type. + * + * @since 2.2.0 */ - public List logLines() throws DescriptorParseException; + public Stream logLines() throws DescriptorParseException; + /** Base interface for accessing log lines. */ public interface Line { /** Returns a log line string. */ diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java index b4f1940..5f3ad73 100644 --- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java +++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java @@ -62,6 +62,12 @@ public interface WebServerAccessLog extends LogDescriptor { @Override public List getUnrecognizedLines(); + /** + * Facilitates access to all log line fields that don't only contain + * default values post sanitization. + * + * @since 2.2.0 + */ public interface Line extends LogDescriptor.Line { /** Returns the IP address of the requesting host. */ diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java index e48a262..3666d5d 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java @@ -15,10 +15,11 @@ import java.io.File; import java.io.InputStreamReader; import java.time.LocalDate; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Implementation of web server access log descriptors. @@ -128,15 +129,34 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl return this.logDate; } - /** Returns a list of all valid log lines. */ + private static final int LISTLIMIT = Integer.MAX_VALUE / 2; + + /** Returns a stream of all valid log lines. */ @Override - public List logLines() + public Stream logLines() throws DescriptorParseException { try (BufferedReader br = new BufferedReader(new InputStreamReader( this.decompressedByteStream( { - return br.lines().map(line - -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line)) -.filter(line -> line.isValid()).collect(Collectors.toList()); + List lists = new ArrayList<>(); + List currentList = new ArrayList<>(); + lists.add(currentList); + String lineStr = br.readLine(); + int count = 0; + while (null != lineStr) { +WebServerAccessLogLine wsal = WebServerAccessLogLine.makeLine(lineStr); +if (wsal.isValid()) { + currentList.add(wsal); + count++; +} +if (count >= LISTLIMIT) { + currentList = new ArrayList<>(); + lists.add(currentList); + count = 0; +} +lineStr = br.readLine(); + } + br.close(); + return lists.stream().flatMap(list -> list.stream()); } catch (Exception ex) { throw new DescriptorParseException("Cannot retrieve log lines.", ex); } diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java index 67ba638..0ff3e62 100644 --- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java +++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java @@ -3,6 +3,8 @@ package org.torproject.descriptor.log; +import static java.util.stream.Collectors.toList;
[tor-commits] [metrics-lib/release] Enable handling of larger (> 2G) log files.
commit c01dfbdc4e5d817fb10fb45f79d8223853cdeac0 Author: iwakehDate: Fri Feb 16 09:05:46 2018 + Enable handling of larger (> 2G) log files. As log files can be compressed very efficiently log descriptor raw bytes contain the compressed bytes. Added methods for accessing uncompressed log content, as well as stream based methods for decompression and compression in class FileType. Adapted all tests to the changes. Implements task-25329. --- .../org/torproject/descriptor/LogDescriptor.java | 26 -- .../torproject/descriptor/internal/FileType.java | 18 ++ .../descriptor/log/LogDescriptorImpl.java | 25 -- .../descriptor/log/WebServerAccessLogImpl.java | 12 +++ .../descriptor/log/LogDescriptorTest.java | 40 +++--- .../descriptor/log/WebServerModuleTest.java| 4 ++- 6 files changed, 82 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java index 6a6bf84..826fcda 100644 --- a/src/main/java/org/torproject/descriptor/LogDescriptor.java +++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java @@ -3,21 +3,34 @@ package org.torproject.descriptor; +import java.io.InputStream; import java.util.List; /** * Contains a log file. * * Unlike other descriptors, logs can get very large and are typically stored - * on disk in compressed form. However, all access to log contents through this - * interface and its subinterfaces is made available in uncompressed form. + * on disk in compressed form. Access to log contents through this + * interface and its subinterfaces is made available in compressed and + * decompressed form: + * + * The raw descriptor bytes are compressed, because logs contain + * often redundant information that can achieve high compression rates. + * For example, a 500kB compressed log file might be deflated to 3GB. + * The uncompressed log contents can be accessed as a stream of bytes. + * A list of log lines (decompressed) can be retrieved. + * + * * * @since 2.2.0 */ public interface LogDescriptor extends Descriptor { /** - * Returns the decompressed raw descriptor bytes of the log. + * Returns the raw compressed descriptor bytes of the log. + * + * For access to the log's decompressed bytes of + * use method {@code decompressedByteStream}. * * @since 2.2.0 */ @@ -25,6 +38,13 @@ public interface LogDescriptor extends Descriptor { public byte[] getRawDescriptorBytes(); /** + * Returns the decompressed raw descriptor bytes of the log as stream. + * + * @since 2.2.0 + */ + public InputStream decompressedByteStream() throws DescriptorParseException; + + /** * Returns annotations found in the log file, which may be an empty List if a * log format does not support adding annotations. * diff --git a/src/main/java/org/torproject/descriptor/internal/FileType.java b/src/main/java/org/torproject/descriptor/internal/FileType.java index 353f0bb..2c07df6 100644 --- a/src/main/java/org/torproject/descriptor/internal/FileType.java +++ b/src/main/java/org/torproject/descriptor/internal/FileType.java @@ -93,6 +93,24 @@ public enum FileType { } /** + * Compresses the given InputStream and returns an OutputStream. + * + * @since 2.2.0 + */ + public OutputStream compress(OutputStream os) throws Exception { +return this.outputStream(os); + } + + /** + * Decompresses the given InputStream and returns an OutputStream. + * + * @since 2.2.0 + */ + public InputStream decompress(InputStream is) throws Exception { +return this.inputStream(is); + } + + /** * Decompresses the given bytes in memory and returns the decompressed bytes. * * @since 2.2.0 diff --git a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java index 97854e4..3583d26 100644 --- a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java +++ b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java @@ -14,10 +14,10 @@ import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; +import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; @@ -76,8 +76,7 @@ public abstract class LogDescriptorImpl this.fileType = FileType.findType(mat.group(1).toUpperCase()); if (FileType.PLAIN == this.fileType) { this.fileType = defaultCompression; - } else { -this.logBytes = this.fileType.decompress(this.logBytes); +this.logBytes = this.fileType.compress(this.logBytes); } } catch
[tor-commits] [metrics-lib/release] Prepare for 2.2.0 release, again.
commit 72f62fbe48d7cbb2de6d7fd5fd38b867fb0b185e Author: Karsten LoesingDate: Mon Feb 26 14:12:31 2018 +0100 Prepare for 2.2.0 release, again. --- CERT | 20 ++-- CHANGELOG.md | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CERT b/CERT index 0f90f7a..43be56e 100644 --- a/CERT +++ b/CERT @@ -1,8 +1,8 @@ -BEGIN CERTIFICATE- -MIIDaTCCAlGgAwIBAgIEM/uNRzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV +MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU -b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw -NTE2MTQ0NDAwWhcNMTcwODE0MTQ0NDAwWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE +b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw +MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU @@ -11,11 +11,11 @@ Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud -DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEACxUk -6eZkayjJGZjw02bWjrkwwe/N5iOnOz9XcpGQY5lUEk12uIYCYHDB66SsZngsbwNQ -qZ8T6Avm9XSrKAKMxBcPsjTUy0Fb7yG3jLRypPlbgoe61YoliYYaWiaV1OQY1nsZ -ujt6f7T5bwsLeh7La7+kB1SGDu+hKH7Bi3RtVN09iL7x+BJVuJOKHKYoK+UO2fB3 -ltFenkGpiDzGQQxJRzA1WbCQsSPrE6Qi3NmWXsTAc+24Y9gTO8qlmfi6zp1W7TGO -+cUmHIO0u3+ewkb8oIj8jjzGTmEbuJrwgBOohQr6CXCicQHhIrBF6Opi7mF/UHSO -J8d+DLvT41EL9JrgCg== +DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU +qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x +RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq +MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO +zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG +qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW +2DiJiA8BLxZToe2XDA== -END CERTIFICATE- diff --git a/CHANGELOG.md b/CHANGELOG.md index 91f00b8..4d87ba4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# Changes in version 2.2.0 - 2018-01-31 +# Changes in version 2.2.0 - 2018-02-26 * Major changes - Add new descriptor type WebServerAccessLog to parse web server ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [metrics-lib/release] Add log line interfaces and access methods.
commit 59689a9fa4c162378f347902eb68e4c21ccf0043 Author: iwakehDate: Tue Feb 6 14:59:05 2018 + Add log line interfaces and access methods. For both the general LogDescriptor and extension WebServerAccessLog. Include some new tests. --- .../org/torproject/descriptor/LogDescriptor.java | 12 ++ .../torproject/descriptor/{log => }/Method.java| 5 ++-- .../torproject/descriptor/WebServerAccessLog.java | 28 ++ .../descriptor/log/WebServerAccessLogImpl.java | 20 .../descriptor/log/WebServerAccessLogLine.java | 15 ++-- .../descriptor/log/LogDescriptorTest.java | 15 +++- 6 files changed, 85 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java index ff02cae..6a6bf84 100644 --- a/src/main/java/org/torproject/descriptor/LogDescriptor.java +++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java @@ -43,5 +43,17 @@ public interface LogDescriptor extends Descriptor { @Override public List getUnrecognizedLines(); + /** + * Returns a list of all parseable log lines. + * Might require a lot of memory depending on log size. + */ + public List logLines() throws DescriptorParseException; + + public interface Line { + +/** Returns a log line string. */ +public String toLogString(); + + } } diff --git a/src/main/java/org/torproject/descriptor/log/Method.java b/src/main/java/org/torproject/descriptor/Method.java similarity index 50% rename from src/main/java/org/torproject/descriptor/log/Method.java rename to src/main/java/org/torproject/descriptor/Method.java index c29d495..9135fe2 100644 --- a/src/main/java/org/torproject/descriptor/log/Method.java +++ b/src/main/java/org/torproject/descriptor/Method.java @@ -1,8 +1,9 @@ /* Copyright 2018 The Tor Project * See LICENSE for licensing information */ -package org.torproject.descriptor.log; +package org.torproject.descriptor; -public enum Method { +/** Enum for web server access log methods. */ +public enum Method { GET, HEAD, POST; } diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java index b94bc30..b4f1940 100644 --- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java +++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java @@ -5,6 +5,7 @@ package org.torproject.descriptor; import java.time.LocalDate; import java.util.List; +import java.util.Optional; /** * Contains a sanitized web server access log file from a {@code torproject.org} @@ -61,5 +62,32 @@ public interface WebServerAccessLog extends LogDescriptor { @Override public List getUnrecognizedLines(); + public interface Line extends LogDescriptor.Line { + +/** Returns the IP address of the requesting host. */ +public String getIp(); + +/** Returns the HTTP method, e.g., GET. */ +public Method getMethod(); + +/** Returns the protocol and version, e.g., HTTP/1.1. */ +public String getProtocol(); + +/** Returns the requested resource. */ +public String getRequest(); + +/** Returns the size of the response in bytes, if available. */ +public Optional getSize(); + +/** Returns the final status code, e.g., 200. */ +public int getResponse(); + +/** Returns the date when the request was received. */ +public LocalDate getDate(); + +/** True, if this is a valid web server access log line. */ +public boolean isValid(); + } + } diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java index f02b1d7..7b56528 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java @@ -10,12 +10,17 @@ import org.torproject.descriptor.internal.FileType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; import java.io.File; +import java.io.InputStreamReader; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.Collection; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; /** * Implementation of web server access log descriptors. @@ -126,5 +131,20 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl return this.logDate; } + /** Returns a list of all valid log lines. */ + @Override + public List logLines() + throws DescriptorParseException { +try (BufferedReader br += new BufferedReader(new InputStreamReader(new ByteArrayInputStream( +this.getRawDescriptorBytes() { + return br.lines().map(line +
[tor-commits] [metrics-lib/master] Prepare for 2.2.0 release, again.
commit 72f62fbe48d7cbb2de6d7fd5fd38b867fb0b185e Author: Karsten LoesingDate: Mon Feb 26 14:12:31 2018 +0100 Prepare for 2.2.0 release, again. --- CERT | 20 ++-- CHANGELOG.md | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CERT b/CERT index 0f90f7a..43be56e 100644 --- a/CERT +++ b/CERT @@ -1,8 +1,8 @@ -BEGIN CERTIFICATE- -MIIDaTCCAlGgAwIBAgIEM/uNRzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV +MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU -b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw -NTE2MTQ0NDAwWhcNMTcwODE0MTQ0NDAwWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE +b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw +MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU @@ -11,11 +11,11 @@ Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud -DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEACxUk -6eZkayjJGZjw02bWjrkwwe/N5iOnOz9XcpGQY5lUEk12uIYCYHDB66SsZngsbwNQ -qZ8T6Avm9XSrKAKMxBcPsjTUy0Fb7yG3jLRypPlbgoe61YoliYYaWiaV1OQY1nsZ -ujt6f7T5bwsLeh7La7+kB1SGDu+hKH7Bi3RtVN09iL7x+BJVuJOKHKYoK+UO2fB3 -ltFenkGpiDzGQQxJRzA1WbCQsSPrE6Qi3NmWXsTAc+24Y9gTO8qlmfi6zp1W7TGO -+cUmHIO0u3+ewkb8oIj8jjzGTmEbuJrwgBOohQr6CXCicQHhIrBF6Opi7mF/UHSO -J8d+DLvT41EL9JrgCg== +DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU +qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x +RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq +MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO +zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG +qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW +2DiJiA8BLxZToe2XDA== -END CERTIFICATE- diff --git a/CHANGELOG.md b/CHANGELOG.md index 91f00b8..4d87ba4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# Changes in version 2.2.0 - 2018-01-31 +# Changes in version 2.2.0 - 2018-02-26 * Major changes - Add new descriptor type WebServerAccessLog to parse web server ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [metrics-lib/master] Bump version to 2.2.0-dev.
commit 7648780844f809f3068d3e625973fe43c78ccbb3 Author: Karsten LoesingDate: Mon Feb 26 16:24:25 2018 +0100 Bump version to 2.2.0-dev. --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 3f081f7..199ba7a 100644 --- a/build.xml +++ b/build.xml @@ -6,7 +6,7 @@ - + ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/master] Bump version to 1.5.0-dev.
commit ef1dfb6d32d5bb42a95052cd44790b6c586f38c9 Author: Karsten LoesingDate: Mon Feb 26 16:24:49 2018 +0100 Bump version to 1.5.0-dev. --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index ff8302b..fb219fa 100644 --- a/build.xml +++ b/build.xml @@ -8,7 +8,7 @@ - + ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/master] Prepare for 1.5.0 release.
commit ddfa7bad243dc8e0a3105b14418794c87786e88f Author: Karsten LoesingDate: Mon Feb 26 14:23:05 2018 +0100 Prepare for 1.5.0 release. --- CERT | 20 ++-- CHANGELOG.md | 2 +- build.xml| 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/CERT b/CERT index d94c0c3..43be56e 100644 --- a/CERT +++ b/CERT @@ -1,8 +1,8 @@ -BEGIN CERTIFICATE- -MIIDaTCCAlGgAwIBAgIEZTniETANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV +MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU -b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw -ODE3MTg1MDQ0WhcNMTcxMTE1MTg1MDQ0WjBlMQswCQYDVQQGEwJVUzELMAkGA1UE +b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw +MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU @@ -11,11 +11,11 @@ Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud -DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAbsAc -gwl5KJH3pVKw4b+ACCOMgW+27MisCFbT1Izq2Wx+JcLMt3N//MoIpYOZWhsIeazW -/NE0fNbkLi0IYA0F1nUC9pHl44Hd8Gjfqa/YQUi9ALtgsY7l6W0sceW8WnZ8bu8J -DfrqnmB0bD2xc9ZjOn58al8dVjVWs95M87D9WCRU6LiaKFj5c45wciABQsTmC0qD -pyHYOaSGtXxXKDw5pAntdtHkCbowV5tDi/QQ8Tg7i5O7xwSh71Q7TZiNFMpLomBL -QllHfTZryFmoHyGn5MfngBUVCVHig5nXmk0dUMGuLiK4789dkgiPRz0vpB5Yf8Yy -CCE2jB6VBi2g5fMx0w== +DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU +qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x +RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq +MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO +zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG +qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW +2DiJiA8BLxZToe2XDA== -END CERTIFICATE- diff --git a/CHANGELOG.md b/CHANGELOG.md index a0b5d1f..eb14839 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# Changes in version 1.5.0 - 2018-01-31 +# Changes in version 1.5.0 - 2018-02-26 * Major changes - Update to metrics-lib 2.2.0. diff --git a/build.xml b/build.xml index 48f6e33..ff8302b 100644 --- a/build.xml +++ b/build.xml @@ -8,7 +8,7 @@ - + ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [collector/master] Circumvent Collection (integer) size limit.
commit d05b4e4aee3bc15c3e4d5bac660dfcee5bc26279 Author: iwakehDate: Tue Feb 20 16:30:14 2018 + Circumvent Collection (integer) size limit. Clean log lines immediately when they are read and also make use of sanitized log's high redundancy immediately, i.e., continue with maps of >. Rename method(s) to reflect what they do. --- .../collector/webstats/SanitizeWeblogs.java| 89 -- 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 1f2e922..5a270dd 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -4,8 +4,10 @@ package org.torproject.collector.webstats; import static java.util.stream.Collectors.counting; +import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.groupingByConcurrent; -import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.reducing; +import static java.util.stream.Collectors.summingLong; import org.torproject.collector.conf.Configuration; import org.torproject.collector.conf.ConfigurationException; @@ -35,6 +37,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; @@ -111,35 +115,36 @@ public class SanitizeWeblogs extends CollecTorMain { : virtualEntry.getValue().entrySet()) { String physicalHost = physicalEntry.getKey(); log.info("Processing logs for {} on {}.", virtualHost, physicalHost); -Map linesByDate +Map > linesByDate = physicalEntry.getValue().values().stream().parallel() -.flatMap((LogMetadata metadata) -> lineStream(metadata) - .filter((line) -> line.isValid())).parallel() -.collect(groupingByConcurrent(WebServerAccessLogLine::getDate)); +.flatMap(metadata -> sanitzedLineStream(metadata).entrySet() +.stream()) +.collect(groupingBy(Map.Entry::getKey, + reducing(Collections.emptyMap(), Map.Entry::getValue, +(e1, e2) -> Stream.concat(e1.entrySet().stream(), e2.entrySet() + .stream()) + .collect(groupingByConcurrent(Map.Entry::getKey, + summingLong(Map.Entry::getValue)); LocalDate[] interval = determineInterval(linesByDate.keySet()); linesByDate.entrySet().stream() .filter((entry) -> entry.getKey().isAfter(interval[0]) && entry.getKey().isBefore(interval[1])).parallel() -.forEach((entry) -> storeSanitized(virtualHost, physicalHost, +.forEach((entry) -> storeSortedAndForget(virtualHost, physicalHost, entry.getKey(), entry.getValue())); } } } - private void storeSanitized(String virtualHost, String physicalHost, - LocalDate date, List lines) { + private void storeSortedAndForget(String virtualHost, String physicalHost, + LocalDate date, Map lineCounts) { String name = new StringJoiner(InternalLogDescriptor.SEP) .add(virtualHost).add(physicalHost) .add(InternalWebServerAccessLog.MARKER) .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)) .toString() + "." + FileType.XZ.name().toLowerCase(); -log.debug("Sanitizing {}.", name); -Map retainedLines = new TreeMap<>(lines -.stream().parallel().map((line) -> sanitize(line, date)) -.filter((line) -> line.isPresent()) -.map((line) -> line.get()) -.collect(groupingByConcurrent(line -> line, counting(; -lines.clear(); // not needed anymore +log.debug("Storing {}.", name); +Map retainedLines = new TreeMap<>(lineCounts); +lineCounts.clear(); // not needed anymore try { WebServerAccessLogPersistence walp = new WebServerAccessLogPersistence( @@ -187,8 +192,8 @@ public class SanitizeWeblogs extends CollecTorMain { .collect(Collectors.joining("\n", "", "\n")).getBytes(); } - static Optional sanitize(WebServerAccessLogLine logLine, - LocalDate date) { + static Optional + sanitize(WebServerAccessLogLine logLine) { if (!logLine.isValid() || !(Method.GET == logLine.getMethod() || Method.HEAD == logLine.getMethod()) @@ -203,10 +208,13 @@ public class SanitizeWeblogs extends CollecTorMain { if (queryStart > 0) { logLine.setRequest(logLine.getRequest().substring(0, queryStart)); } -
[tor-commits] [collector/master] Reduce memory footprint and wall time.
commit 8557bf6255e6e3745088033e8e7bad7801421686 Author: iwakehDate: Tue Feb 20 16:30:09 2018 + Reduce memory footprint and wall time. Adapt to latest changes of metrics-lib (task-25329) and make use of the high redundancy of logs (e.g. a 3G file might only contain 350 different lines). This avoids OOM and array out of bounds exceptions for large files (>2G) and gives a speed-up of roughly 50%. (The earlier 66min are down to 34min for meronense files plus two larger files.) There is a BATCH constant, which could be tuned for processing speed. It is logged for each webstats module run. Currently, it is set to 100k. This was more or less arbitrarily chosen and used for all the tests. A test run using 500k didn't show significant differences. --- .../persist/WebServerAccessLogPersistence.java | 8 --- .../collector/webstats/SanitizeWeblogs.java| 61 ++ 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java index 792d3a9..dab4112 100644 --- a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java +++ b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java @@ -5,7 +5,6 @@ package org.torproject.collector.persist; import org.torproject.descriptor.WebServerAccessLog; import org.torproject.descriptor.internal.FileType; -import org.torproject.descriptor.log.InternalLogDescriptor; import org.torproject.descriptor.log.InternalWebServerAccessLog; import org.slf4j.Logger; @@ -30,13 +29,6 @@ public class WebServerAccessLogPersistence /** Prepare storing the given descriptor. */ public WebServerAccessLogPersistence(WebServerAccessLog desc) { super(desc, new byte[0]); -byte[] compressedBytes = null; -try { // The descriptor bytes have to be stored compressed. - compressedBytes = COMPRESSION.compress(desc.getRawDescriptorBytes()); - ((InternalLogDescriptor)desc).setRawDescriptorBytes(compressedBytes); -} catch (Exception ex) { - log.warn("Cannot compress â{}â. Storing uncompressed.", ex); -} calculatePaths(); } diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 7601898..1f2e922 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -3,6 +3,7 @@ package org.torproject.collector.webstats; +import static java.util.stream.Collectors.counting; import static java.util.stream.Collectors.groupingByConcurrent; import static java.util.stream.Collectors.toList; @@ -17,6 +18,7 @@ import org.torproject.collector.persist.WebServerAccessLogPersistence; import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.Method; import org.torproject.descriptor.WebServerAccessLog; +import org.torproject.descriptor.internal.FileType; import org.torproject.descriptor.log.InternalLogDescriptor; import org.torproject.descriptor.log.InternalWebServerAccessLog; import org.torproject.descriptor.log.WebServerAccessLogImpl; @@ -26,8 +28,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; -import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.InputStreamReader; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; @@ -40,6 +43,7 @@ import java.util.SortedSet; import java.util.StringJoiner; import java.util.TreeMap; import java.util.TreeSet; +import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -87,6 +91,7 @@ public class SanitizeWeblogs extends CollecTorMain { Set sources = this.config.getSourceTypeSet( Key.WebstatsSources); if (sources.contains(SourceType.Local)) { +log.info("Processing logs using batch value {}.", BATCH); findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins)); PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath)); } @@ -126,24 +131,60 @@ public class SanitizeWeblogs extends CollecTorMain { String name = new StringJoiner(InternalLogDescriptor.SEP) .add(virtualHost).add(physicalHost) .add(InternalWebServerAccessLog.MARKER) -.add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString(); +.add(date.format(DateTimeFormatter.BASIC_ISO_DATE)) +.toString() + "." + FileType.XZ.name().toLowerCase(); log.debug("Sanitizing {}.", name); -List retainedLines = lines +Map retainedLines = new TreeMap<>(lines .stream().parallel().map((line) ->
[tor-commits] [collector/master] Adapt CollecTor to latest metrics-lib master branch.
commit fbb35f75da022a23912b937b1825d8f216abad07 Author: iwakehDate: Tue Feb 20 16:30:08 2018 + Adapt CollecTor to latest metrics-lib master branch. --- src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java index 53ad118..7601898 100644 --- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java @@ -15,10 +15,10 @@ import org.torproject.collector.cron.CollecTorMain; import org.torproject.collector.persist.PersistenceUtils; import org.torproject.collector.persist.WebServerAccessLogPersistence; import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.Method; import org.torproject.descriptor.WebServerAccessLog; import org.torproject.descriptor.log.InternalLogDescriptor; import org.torproject.descriptor.log.InternalWebServerAccessLog; -import org.torproject.descriptor.log.Method; import org.torproject.descriptor.log.WebServerAccessLogImpl; import org.torproject.descriptor.log.WebServerAccessLogLine; ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits
[tor-commits] [metrics-lib/master] Accomodate logs with more than Integer.MAX_VALUE lines.
commit 35feb816f81f26bcc9dc035a1aaf496c34a86647 Author: iwakehDate: Fri Feb 16 09:05:46 2018 + Accomodate logs with more than Integer.MAX_VALUE lines. Implements task-23046. --- .../org/torproject/descriptor/LogDescriptor.java | 10 +-- .../torproject/descriptor/WebServerAccessLog.java | 6 .../descriptor/log/WebServerAccessLogImpl.java | 32 ++ .../descriptor/log/LogDescriptorTest.java | 5 +++- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java index 826fcda..8dd8460 100644 --- a/src/main/java/org/torproject/descriptor/LogDescriptor.java +++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java @@ -5,6 +5,7 @@ package org.torproject.descriptor; import java.io.InputStream; import java.util.List; +import java.util.stream.Stream; /** * Contains a log file. @@ -64,11 +65,14 @@ public interface LogDescriptor extends Descriptor { public List getUnrecognizedLines(); /** - * Returns a list of all parseable log lines. - * Might require a lot of memory depending on log size. + * Returns a stream of all parseable log lines. + * Depending on log size this might not fit into a collection type. + * + * @since 2.2.0 */ - public List logLines() throws DescriptorParseException; + public Stream logLines() throws DescriptorParseException; + /** Base interface for accessing log lines. */ public interface Line { /** Returns a log line string. */ diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java index b4f1940..5f3ad73 100644 --- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java +++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java @@ -62,6 +62,12 @@ public interface WebServerAccessLog extends LogDescriptor { @Override public List getUnrecognizedLines(); + /** + * Facilitates access to all log line fields that don't only contain + * default values post sanitization. + * + * @since 2.2.0 + */ public interface Line extends LogDescriptor.Line { /** Returns the IP address of the requesting host. */ diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java index e48a262..3666d5d 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java @@ -15,10 +15,11 @@ import java.io.File; import java.io.InputStreamReader; import java.time.LocalDate; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Implementation of web server access log descriptors. @@ -128,15 +129,34 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl return this.logDate; } - /** Returns a list of all valid log lines. */ + private static final int LISTLIMIT = Integer.MAX_VALUE / 2; + + /** Returns a stream of all valid log lines. */ @Override - public List logLines() + public Stream logLines() throws DescriptorParseException { try (BufferedReader br = new BufferedReader(new InputStreamReader( this.decompressedByteStream( { - return br.lines().map(line - -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line)) -.filter(line -> line.isValid()).collect(Collectors.toList()); + List lists = new ArrayList<>(); + List currentList = new ArrayList<>(); + lists.add(currentList); + String lineStr = br.readLine(); + int count = 0; + while (null != lineStr) { +WebServerAccessLogLine wsal = WebServerAccessLogLine.makeLine(lineStr); +if (wsal.isValid()) { + currentList.add(wsal); + count++; +} +if (count >= LISTLIMIT) { + currentList = new ArrayList<>(); + lists.add(currentList); + count = 0; +} +lineStr = br.readLine(); + } + br.close(); + return lists.stream().flatMap(list -> list.stream()); } catch (Exception ex) { throw new DescriptorParseException("Cannot retrieve log lines.", ex); } diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java index 67ba638..0ff3e62 100644 --- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java +++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java @@ -3,6 +3,8 @@ package org.torproject.descriptor.log; +import static java.util.stream.Collectors.toList;
[tor-commits] [metrics-lib/master] Enable handling of larger (> 2G) log files.
commit c01dfbdc4e5d817fb10fb45f79d8223853cdeac0 Author: iwakehDate: Fri Feb 16 09:05:46 2018 + Enable handling of larger (> 2G) log files. As log files can be compressed very efficiently log descriptor raw bytes contain the compressed bytes. Added methods for accessing uncompressed log content, as well as stream based methods for decompression and compression in class FileType. Adapted all tests to the changes. Implements task-25329. --- .../org/torproject/descriptor/LogDescriptor.java | 26 -- .../torproject/descriptor/internal/FileType.java | 18 ++ .../descriptor/log/LogDescriptorImpl.java | 25 -- .../descriptor/log/WebServerAccessLogImpl.java | 12 +++ .../descriptor/log/LogDescriptorTest.java | 40 +++--- .../descriptor/log/WebServerModuleTest.java| 4 ++- 6 files changed, 82 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java index 6a6bf84..826fcda 100644 --- a/src/main/java/org/torproject/descriptor/LogDescriptor.java +++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java @@ -3,21 +3,34 @@ package org.torproject.descriptor; +import java.io.InputStream; import java.util.List; /** * Contains a log file. * * Unlike other descriptors, logs can get very large and are typically stored - * on disk in compressed form. However, all access to log contents through this - * interface and its subinterfaces is made available in uncompressed form. + * on disk in compressed form. Access to log contents through this + * interface and its subinterfaces is made available in compressed and + * decompressed form: + * + * The raw descriptor bytes are compressed, because logs contain + * often redundant information that can achieve high compression rates. + * For example, a 500kB compressed log file might be deflated to 3GB. + * The uncompressed log contents can be accessed as a stream of bytes. + * A list of log lines (decompressed) can be retrieved. + * + * * * @since 2.2.0 */ public interface LogDescriptor extends Descriptor { /** - * Returns the decompressed raw descriptor bytes of the log. + * Returns the raw compressed descriptor bytes of the log. + * + * For access to the log's decompressed bytes of + * use method {@code decompressedByteStream}. * * @since 2.2.0 */ @@ -25,6 +38,13 @@ public interface LogDescriptor extends Descriptor { public byte[] getRawDescriptorBytes(); /** + * Returns the decompressed raw descriptor bytes of the log as stream. + * + * @since 2.2.0 + */ + public InputStream decompressedByteStream() throws DescriptorParseException; + + /** * Returns annotations found in the log file, which may be an empty List if a * log format does not support adding annotations. * diff --git a/src/main/java/org/torproject/descriptor/internal/FileType.java b/src/main/java/org/torproject/descriptor/internal/FileType.java index 353f0bb..2c07df6 100644 --- a/src/main/java/org/torproject/descriptor/internal/FileType.java +++ b/src/main/java/org/torproject/descriptor/internal/FileType.java @@ -93,6 +93,24 @@ public enum FileType { } /** + * Compresses the given InputStream and returns an OutputStream. + * + * @since 2.2.0 + */ + public OutputStream compress(OutputStream os) throws Exception { +return this.outputStream(os); + } + + /** + * Decompresses the given InputStream and returns an OutputStream. + * + * @since 2.2.0 + */ + public InputStream decompress(InputStream is) throws Exception { +return this.inputStream(is); + } + + /** * Decompresses the given bytes in memory and returns the decompressed bytes. * * @since 2.2.0 diff --git a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java index 97854e4..3583d26 100644 --- a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java +++ b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java @@ -14,10 +14,10 @@ import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; +import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; @@ -76,8 +76,7 @@ public abstract class LogDescriptorImpl this.fileType = FileType.findType(mat.group(1).toUpperCase()); if (FileType.PLAIN == this.fileType) { this.fileType = defaultCompression; - } else { -this.logBytes = this.fileType.decompress(this.logBytes); +this.logBytes = this.fileType.compress(this.logBytes); } } catch
[tor-commits] [tor-browser-build/master] Bug 25336: Bump obfs4proxy to 0.0.7
commit 4bed9a85478b6fb16e0d654589d8cb8ed3865027 Author: Georg KoppenDate: Mon Feb 26 08:26:20 2018 + Bug 25336: Bump obfs4proxy to 0.0.7 0.0.7 has long been out and OnionShare needs its meek_lite feature for macOS. Let's update to this latest stable obfs4 version then. --- projects/obfs4/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/obfs4/config b/projects/obfs4/config index 44db79d..916887f 100644 --- a/projects/obfs4/config +++ b/projects/obfs4/config @@ -1,5 +1,5 @@ # vim: filetype=yaml sw=2 -version: 0.0.5 +version: 0.0.7 git_url: https://git.torproject.org/pluggable-transports/obfs4.git git_hash: 'obfs4proxy-[% c("version") %]' tag_gpg_id: 1 ___ tor-commits mailing list tor-commits@lists.torproject.org https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits