[tor-commits] [torbirdy/master] Ensure compatibility for Thunderbird 59 (use proper imports)

2018-02-26 Thread sukhbir
commit b8e45f597969aa592e23c5f8dd34dfd397e9837f
Author: Sukhbir Singh 
Date:   Mon Feb 26 14:39:03 2018 -0500

Ensure compatibility for Thunderbird 59 (use proper imports)
---
 components/torbirdy.js | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/components/torbirdy.js b/components/torbirdy.js
index 4dafef9..430c15f 100644
--- a/components/torbirdy.js
+++ b/components/torbirdy.js
@@ -1,7 +1,7 @@
 var { interfaces: Ci, utils: Cu, classes: Cc } = Components;
 
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
-Cu.import("resource://gre/modules/imXPCOMUtils.jsm");
+Cu.import("resource:///modules/imXPCOMUtils.jsm");
 Cu.import("resource://gre/modules/AddonManager.jsm");
 Cu.import("resource://gre/modules/Preferences.jsm");
 
@@ -439,7 +439,6 @@ function TorBirdy() {
   this.setPrefs();
   sanitizeDateHeaders();
 
-  dump("TorBirdy registered!\n");
 }
 
 TorBirdy.prototype = {
@@ -471,7 +470,6 @@ TorBirdy.prototype = {
   onUninstalling: function(addon, needsRestart) {
 this.onStateChange();
 if (addon.id == TB_ID) {
-  dump("TorBirdy uninstall requested\n");
   this._uninstall = true;
   this.resetUserPrefs();
 }
@@ -480,7 +478,6 @@ TorBirdy.prototype = {
   onOperationCancelled: function(addon) {
 this.onStateChange();
 if (addon.id == TB_ID) {
-  dump("Uninstall requested cancelled\n");
   this._uninstall = false;
   this.setPrefs();
 }
@@ -491,14 +488,12 @@ TorBirdy.prototype = {
   },
 
   resetUserPrefs: function() {
-dump("Resetting user preferences to default\n");
 // Clear the Thunderbird preferences we changed.
 for (let each in TorBirdyPrefs) {
   this.prefs.clearUserPref(each);
 }
 
 // Restore the older proxy preferences that were set prior to TorBirdy's 
install.
-dump("Restoring proxy settings\n");
 for (let i = 0; i < TorBirdyOldPrefs.length; i++) {
   var oldPref = TorBirdyOldPrefs[i];
   var setValue = kRestoreBranch + oldPref;



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [torbirdy/master] Add locale for en-US (default English locale)

2018-02-26 Thread sukhbir
commit 39bd34681a709db42ad52c16d70daa0cd0f52644
Author: Sukhbir Singh 
Date:   Mon Feb 26 14:47:33 2018 -0500

Add locale for en-US (default English locale)
---
 chrome/locale/en-US/torbirdy.dtd| 60 +
 chrome/locale/en-US/torbirdy.properties | 18 ++
 2 files changed, 78 insertions(+)

diff --git a/chrome/locale/en-US/torbirdy.dtd b/chrome/locale/en-US/torbirdy.dtd
new file mode 100644
index 000..c88803a
--- /dev/null
+++ b/chrome/locale/en-US/torbirdy.dtd
@@ -0,0 +1,60 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/chrome/locale/en-US/torbirdy.properties 
b/chrome/locale/en-US/torbirdy.properties
new file mode 100644
index 000..4f645eb
--- /dev/null
+++ b/chrome/locale/en-US/torbirdy.properties
@@ -0,0 +1,18 @@
+torbirdy.name=TorBirdy
+
+torbirdy.enabled.tor=TorBirdy Enabled:Tor
+torbirdy.enabled.jondo=TorBirdy Enabled:JonDo
+torbirdy.enabled.custom=TorBirdy Enabled:Custom Proxy
+torbirdy.enabled.torification=TorBirdy Enabled:Transparent Torification
+torbirdy.enabled.whonix=TorBirdy Enabled:Whonix
+torbirdy.disabled=TorBirdy:Disabled!
+torbirdy.enabled=TorBirdy:Enabled
+
+torbirdy.email.prompt=TorBirdy has disabled Thunderbird's auto-configuration 
wizard to protect your anonymity.\n\nThe recommended security settings for %S 
have been set.\n\nYou can now configure the other account settings manually.
+
+torbirdy.email.advanced=Please note that changing the advanced settings of 
TorBirdy is NOT recommended.\n\nYou should only continue if you are sure of 
what you are doing.
+torbirdy.email.advanced.nextwarning=Show this warning next time
+torbirdy.email.advanced.title=TorBirdy Advanced Settings
+
+torbirdy.firstrun=You are now running TorBirdy.\n\nTo help protect your 
anonymity, TorBirdy will enforce the Thunderbird settings it has set, 
preventing them from being changed by you or by any add-on. There are some 
settings that can be changed and those are accessed through TorBirdy's 
preferences dialog. When TorBirdy is uninstalled or disabled, all settings that 
it changes are reset to their default values (the values prior to TorBirdy's 
install).\n\nIf you are a new user, it is recommended that you read through the 
TorBirdy website to understand what we are trying to accomplish with TorBirdy 
for our users.
+torbirdy.website=https://trac.torproject.org/projects/tor/wiki/torbirdy



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [torbirdy/master] Drop support for versioned JavaScript (mozilla57; see bugzilla 1390106)

2018-02-26 Thread sukhbir
commit c9d9fa56d8f9eac0c5af0dfebf2d8857ff45e19a
Author: Sukhbir Singh 
Date:   Mon Feb 26 14:41:41 2018 -0500

Drop support for versioned JavaScript (mozilla57; see bugzilla 1390106)
---
 chrome/content/accountpref.xul  | 2 +-
 chrome/content/firstruninfo.xul | 2 +-
 chrome/content/preferences.xul  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/chrome/content/accountpref.xul b/chrome/content/accountpref.xul
index baf419e..f460f64 100644
--- a/chrome/content/accountpref.xul
+++ b/chrome/content/accountpref.xul
@@ -14,7 +14,7 @@
 onload="return org.torbirdy.accountprefs.onLoad();"
 ondialogaccept="return org.torbirdy.accountprefs.onAccept();">
 
-   
 

diff --git a/chrome/content/firstruninfo.xul b/chrome/content/firstruninfo.xul
index 43f1d66..17fd73a 100644
--- a/chrome/content/firstruninfo.xul
+++ b/chrome/content/firstruninfo.xul
@@ -10,7 +10,7 @@
 onload="return org.torbirdy.firstrun.onLoad();"
 ondialogaccept="window.close();">
 
-   
 

diff --git a/chrome/content/preferences.xul b/chrome/content/preferences.xul
index 2e3c168..296f220 100644
--- a/chrome/content/preferences.xul
+++ b/chrome/content/preferences.xul
@@ -19,7 +19,7 @@
 ondialogaccept="return org.torbirdy.prefs.onAccept();"
 onload="return org.torbirdy.prefs.onLoad();">
 
-  
 
   



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [torbirdy/master] Update install.rdf

2018-02-26 Thread sukhbir
commit baccc82cc942daeaaeb369e885e1c7b6df40b690
Author: Sukhbir Singh 
Date:   Mon Feb 26 14:45:41 2018 -0500

Update install.rdf

- Bump version to 0.2.4
- Set minVersion to 52.0 and maxVersion to 59.*
---
 install.rdf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/install.rdf b/install.rdf
index fefb89f..86d94b1 100644
--- a/install.rdf
+++ b/install.rdf
@@ -4,7 +4,7 @@
  xmlns:em="http://www.mozilla.org/2004/em-rdf#;>
   
 castironthunderbirdc...@torproject.org
-0.2.3
+0.2.4
 2
 
chrome://castironthunderbirdclub/skin/images/tor.png
 
chrome://castironthunderbirdclub/content/preferences.xul
@@ -13,8 +13,8 @@
 
 
   {3550f703-e582-4d05-9a08-453d09bdfdc6}
-  45.0
-  52.*
+  52.0
+  59.*
 
 
 



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [torbirdy/master] Update the ChangeLog

2018-02-26 Thread sukhbir
commit 38a66b01fecc11d188fffafdaf0ec32ddad3db77
Author: Sukhbir Singh 
Date:   Mon Feb 26 14:48:35 2018 -0500

Update the ChangeLog
---
 ChangeLog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ChangeLog b/ChangeLog
index 7d6869d..d95c5d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,6 @@
 0.2.4, 
  * Update translations, adding all supported languages from Transifex
+ * Added support for Thunderbird 58 and Thunderbird 59
 
 0.2.3, 04 Aug 2017
  * Bug 21880: Enable encrypted email headers for Enigmail (Memory Hole)

___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [torbirdy/master] Update the changelog

2018-02-26 Thread sukhbir
commit 30f59fa5df0c6bb22b024ba6e33dfbe16a1ac935
Author: Sukhbir Singh 
Date:   Tue Feb 13 12:38:59 2018 -0500

Update the changelog
---
 ChangeLog | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index fcf7a26..7d6869d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,6 @@
+0.2.4, 
+ * Update translations, adding all supported languages from Transifex
+
 0.2.3, 04 Aug 2017
  * Bug 21880: Enable encrypted email headers for Enigmail (Memory Hole)
  * Bug 22569: Update Enigmail values for custom proxy settings

___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Use Java8 idiom for toString method.

2018-02-26 Thread karsten
commit 2457eb5be72d508c4ec4e2d2c3b6f7a88c69ed4c
Author: iwakeh 
Date:   Fri Oct 27 17:35:15 2017 +

Use Java8 idiom for toString method.
---
 .../org/torproject/collector/bridgedescs/DescriptorBuilder.java   | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java 
b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index cc57662..4ca9dd1 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -5,6 +5,7 @@ package org.torproject.collector.bridgedescs;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.stream.Collectors;
 
 /** Builder for descriptors.
  *
@@ -55,10 +56,7 @@ class DescriptorBuilder {
 
   @Override
   public String toString() {
-StringBuilder full = new StringBuilder();
-for (Object part : this.parts) {
-  full.append(part.toString());
-}
-return full.toString();
+return this.parts.stream().map(part -> part.toString())
+.collect(Collectors.joining(""));
   }
 }



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Adapt CollecTor to latest metrics-lib master branch.

2018-02-26 Thread karsten
commit fbb35f75da022a23912b937b1825d8f216abad07
Author: iwakeh 
Date:   Tue Feb 20 16:30:08 2018 +

Adapt CollecTor to latest metrics-lib master branch.
---
 src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 53ad118..7601898 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -15,10 +15,10 @@ import org.torproject.collector.cron.CollecTorMain;
 import org.torproject.collector.persist.PersistenceUtils;
 import org.torproject.collector.persist.WebServerAccessLogPersistence;
 import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.Method;
 import org.torproject.descriptor.WebServerAccessLog;
 import org.torproject.descriptor.log.InternalLogDescriptor;
 import org.torproject.descriptor.log.InternalWebServerAccessLog;
-import org.torproject.descriptor.log.Method;
 import org.torproject.descriptor.log.WebServerAccessLogImpl;
 import org.torproject.descriptor.log.WebServerAccessLogLine;
 



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Make logging statements comply to Metrics' standards.

2018-02-26 Thread karsten
commit 43cd15876635d763d0f6adbf6bcc5c7df6380406
Author: iwakeh 
Date:   Fri Oct 27 17:35:18 2017 +

Make logging statements comply to Metrics' standards.

Also edit here and there for more readability and less lines.
---
 .../bridgedescs/SanitizedBridgesWriter.java| 145 +
 1 file changed, 62 insertions(+), 83 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index b4cd49e..22bf8f7 100644
--- 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -167,9 +167,9 @@ public class SanitizedBridgesWriter extends CollecTorMain {
   && line.length() != ("-MM,".length() + 83 * 2))
   || parts.length != 2) {
 logger.warn("Invalid line in bridge-ip-secrets file "
-+ "starting with '" + line.substring(0, 7) + "'! "
++ "starting with '{}'! "
 + "Not calculating any IP address hashes in this "
-+ "execution!");
++ "execution!", line.substring(0, 7));
 this.persistenceProblemWithSecrets = true;
 break;
   }
@@ -178,19 +178,16 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
   this.secretsForHashingIpAddresses.put(month, secret);
 }
 if (!this.persistenceProblemWithSecrets) {
-  logger.debug("Read "
-  + this.secretsForHashingIpAddresses.size() + " secrets for "
-  + "hashing bridge IP addresses.");
+  logger.debug("Read {} secrets for hashing bridge IP addresses.",
+  this.secretsForHashingIpAddresses.size());
 }
   } catch (DecoderException e) {
-logger.warn("Failed to decode hex string in "
-+ this.bridgeIpSecretsFile + "! Not calculating any IP "
-+ "address hashes in this execution!", e);
+logger.warn("Failed to decode hex string in {}! Not calculating any IP 
"
++ "address hashes in this execution!", this.bridgeIpSecretsFile, 
e);
 this.persistenceProblemWithSecrets = true;
   } catch (IOException e) {
-logger.warn("Failed to read "
-+ this.bridgeIpSecretsFile + "! Not calculating any IP "
-+ "address hashes in this execution!", e);
+logger.warn("Failed to read {}! Not calculating any IP "
++ "address hashes in this execution!", this.bridgeIpSecretsFile, 
e);
 this.persistenceProblemWithSecrets = true;
   }
 }
@@ -490,8 +487,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 } else if (line.startsWith("fingerprint ")) {
   if (!("fingerprint " + authorityFingerprint).equals(line)) {
 logger.warn("Mismatch between authority fingerprint expected from "
-+ "file name (" + authorityFingerprint + ") and parsed from "
-+ "\"fingerprint\" line (\"" + line + "\").");
++ "file name ({}) and parsed from \"fingerprint\" "
++ "line (\"{}\").", authorityFingerprint, line);
 return;
   }
   header.append(line).newLine();
@@ -511,13 +508,13 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
   /* Parse the relevant parts of this r line. */
   String[] parts = line.split(" ");
   if (parts.length < 9) {
-logger.warn("Illegal line '" + line + "' in bridge network "
-+ "status.  Skipping descriptor.");
+logger.warn("Illegal line '{}' in bridge network "
++ "status.  Skipping descriptor.", line);
 return;
   }
   if (!Base64.isBase64(parts[2])) {
-logger.warn("Illegal base64 character in r line '" + parts[2]
-+ "'.  Skipping descriptor.");
+logger.warn("Illegal base64 character in r line '{}'.  "
++ "Skipping descriptor.", parts[2]);
 return;
   }
   fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
@@ -567,8 +564,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
   if (scrubbedOrAddress != null) {
 scrubbed.append("a " + scrubbedOrAddress + "\n");
   } else {
-logger.warn("Invalid address in line '" + line
-+ "' in bridge network status.  Skipping line!");
+logger.warn("Invalid address in line '{}' "
++ "in bridge network status.  Skipping line!", line);
   }
 
 /* Nothing special about s, w, and p lines; just copy them. */
@@ -581,8 +578,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
  * network status.  If there is, we should probably learn before
 

[tor-commits] [collector/release] Separate parsing and sanitizing steps for bridge descriptors.

2018-02-26 Thread karsten
commit d5aba97f9b6c4ee74735b183552b8435e5e0661b
Author: Karsten Loesing 
Date:   Fri Oct 27 19:26:57 2017 +0200

Separate parsing and sanitizing steps for bridge descriptors.

First step towards implementing #20549.
---
 .../SanitizedBridgeDescriptorBuilder.java  |  54 +
 .../bridgedescs/SanitizedBridgesWriter.java| 240 +
 2 files changed, 161 insertions(+), 133 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
new file mode 100644
index 000..174a5ae
--- /dev/null
+++ 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
@@ -0,0 +1,54 @@
+package org.torproject.collector.bridgedescs;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Builder for sanitized bridge descriptors.
+ *
+ * This builder class can be used while parsing and sanitizing an original
+ * bridge descriptor. It accepts already sanitized {@code String}s and
+ * {@code StringBuilder}s as placeholders for parts that can only be sanitized
+ * after finishing the parsing step. */
+class SanitizedBridgeDescriptorBuilder {
+
+  private List descriptorParts;
+
+  private StringBuilder lastDescriptorPart;
+
+  SanitizedBridgeDescriptorBuilder() {
+this.descriptorParts = new ArrayList<>();
+this.lastDescriptorPart = new StringBuilder();
+this.descriptorParts.add(this.lastDescriptorPart);
+  }
+
+  SanitizedBridgeDescriptorBuilder append(String sanitizedString) {
+this.lastDescriptorPart.append(sanitizedString);
+return this;
+  }
+
+  SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) {
+this.descriptorParts.add(placeholder);
+this.lastDescriptorPart = new StringBuilder();
+this.descriptorParts.add(this.lastDescriptorPart);
+return this;
+  }
+
+  SanitizedBridgeDescriptorBuilder space() {
+this.lastDescriptorPart.append(' ');
+return this;
+  }
+
+  SanitizedBridgeDescriptorBuilder newLine() {
+this.lastDescriptorPart.append('\n');
+return this;
+  }
+
+  @Override
+  public String toString() {
+StringBuilder fullDescriptor = new StringBuilder();
+for (StringBuilder descriptorPart : this.descriptorParts) {
+  fullDescriptor.append(descriptorPart);
+}
+return fullDescriptor.toString();
+  }
+}
diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index e257245..1ef1d60 100644
--- 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -34,6 +34,7 @@ import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
@@ -671,23 +672,20 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
 }
 
 /* Parse descriptor to generate a sanitized version. */
-String scrubbedDesc = null;
+String address = null;
 String published = null;
+byte[] fingerprintBytes = null;
+StringBuilder scrubbedAddress = null;
+Map scrubbedTcpPorts = new HashMap<>();
+Map scrubbedIpAddressesAndTcpPorts = new 
HashMap<>();
 String masterKeyEd25519FromIdentityEd25519 = null;
-try {
-  BufferedReader br = new BufferedReader(new StringReader(
-  new String(data, "US-ASCII")));
-  StringBuilder scrubbed = new StringBuilder();
-  String line = null;
-  byte[] fingerprintBytes = null;
-  String hashedBridgeIdentity = null;
-  String address = null;
-  String routerLine = null;
-  String scrubbedRouterLine = null;
-  String scrubbedAddress = null;
+SanitizedBridgeDescriptorBuilder scrubbed =
+new SanitizedBridgeDescriptorBuilder();
+try (BufferedReader br = new BufferedReader(new StringReader(
+new String(data, "US-ASCII" {
+  scrubbed.append(Annotation.BridgeServer.toString());
+  String line;
   String masterKeyEd25519 = null;
-  List orAddresses = null;
-  List scrubbedOrAddresses = null;
   boolean skipCrypto = false;
   while ((line = br.readLine()) != null) {
 
@@ -706,15 +704,26 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
 return;
   }
   address = parts[2];
-  routerLine = line;
-
-/* Store or-address parts in a list and sanitize them when we have
- * read the fingerprint. */
+  scrubbedAddress = new StringBuilder();
+  StringBuilder scrubbedOrPort = new StringBuilder();
+  scrubbedTcpPorts.put(scrubbedOrPort, parts[3]);
+   

[tor-commits] [collector/release] Add hasContent method to make even more use of DescriptorBuilder.

2018-02-26 Thread karsten
commit 5b68aaf8aa7c5f3769544061344e75f7884e87ef
Author: iwakeh 
Date:   Fri Oct 27 17:35:19 2017 +

Add hasContent method to make even more use of DescriptorBuilder.
---
 .../collector/bridgedescs/DescriptorBuilder.java   |  4 +++
 .../bridgedescs/SanitizedBridgesWriter.java| 42 +++---
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java 
b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index f530368..9d23adf 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -80,6 +80,10 @@ class DescriptorBuilder {
 return this;
   }
 
+  public boolean hasContent() {
+return this.parts.size() > 1 || lastPart.length() > 0;
+  }
+
   @Override
   public String toString() {
 if (!this.finalized) {
diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index 22bf8f7..af54e03 100644
--- 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -462,7 +462,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 boolean includesFingerprintLine = false;
 SortedMap scrubbedLines = new TreeMap<>();
 try {
-  StringBuilder scrubbed = new StringBuilder();
+  DescriptorBuilder scrubbed = new DescriptorBuilder();
   BufferedReader br = new BufferedReader(new StringReader(new String(
   data, "US-ASCII")));
   String line = null;
@@ -499,10 +499,10 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
 } else if (line.startsWith("r ")) {
 
   /* Clear buffer from previously scrubbed lines. */
-  if (scrubbed.length() > 0) {
+  if (scrubbed.hasContent()) {
 String scrubbedLine = scrubbed.toString();
 scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
-scrubbed = new StringBuilder();
+scrubbed = new DescriptorBuilder();
   }
 
   /* Parse the relevant parts of this r line. */
@@ -549,11 +549,13 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
   fingerprintBytes, descPublicationTime);
   String scrubbedDirPort = this.scrubTcpPort(dirPort,
   fingerprintBytes, descPublicationTime);
-  scrubbed.append("r " + nickname + " "
-  + hashedBridgeIdentityBase64 + " "
-  + hashedDescriptorIdentifier + " " + descPublicationTime
-  + " " + scrubbedAddress + " " + scrubbedOrPort + " "
-  + scrubbedDirPort + "\n");
+  scrubbed.append("r ").append(nickname).space()
+  .append(hashedBridgeIdentityBase64).space()
+  .append(hashedDescriptorIdentifier).space()
+  .append(descPublicationTime).space()
+  .append(scrubbedAddress).space()
+  .append(scrubbedOrPort).space()
+  .append(scrubbedDirPort).newLine();
 
 /* Sanitize any addresses in a lines using the fingerprint and
  * descriptor publication time from the previous r line. */
@@ -562,7 +564,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
   line.substring("a ".length()), fingerprintBytes,
   descPublicationTime);
   if (scrubbedOrAddress != null) {
-scrubbed.append("a " + scrubbedOrAddress + "\n");
+scrubbed.append("a ").append(scrubbedOrAddress).newLine();
   } else {
 logger.warn("Invalid address in line '{}' "
 + "in bridge network status.  Skipping line!", line);
@@ -572,7 +574,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 } else if (line.startsWith("s ") || line.equals("s")
 || line.startsWith("w ") || line.equals("w")
 || line.startsWith("p ") || line.equals("p")) {
-  scrubbed.append(line + "\n");
+  scrubbed.append(line).newLine();
 
 /* There should be nothing else but r, a, w, p, and s lines in the
  * network status.  If there is, we should probably learn before
@@ -584,10 +586,10 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
 }
   }
   br.close();
-  if (scrubbed.length() > 0) {
+  if (scrubbed.hasContent()) {
 String scrubbedLine = scrubbed.toString();
 scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
-scrubbed = new StringBuilder();
+scrubbed = new DescriptorBuilder();
   }
   if (!includesFingerprintLine) {
 header.append("fingerprint ").append(authorityFingerprint).newLine();
@@ -1119,7 +1121,7 @@ public 

[tor-commits] [collector/release] Rename SanitizedBridgeDescriptorBuilder to DescriptorBuilder.

2018-02-26 Thread karsten
commit 266051f3397bb0f676054ce2459502f680bfab6d
Author: iwakeh 
Date:   Fri Oct 27 17:35:10 2017 +

Rename SanitizedBridgeDescriptorBuilder to DescriptorBuilder.

The class doesn't 'know' about descriptor sanitization, it is only a sort of
container for writing descriptors.  It could be actually moved to some util
package and used in other parsing steps, too.

Also rename test helper classes to avoid naming conflicts.
Remove 'descriptor' from variable names.
Make DescriptorBuilder public.
Adapt other classes as well as tests.
---
 .../collector/bridgedescs/DescriptorBuilder.java   |  57 +
 .../SanitizedBridgeDescriptorBuilder.java  |  54 
 .../bridgedescs/SanitizedBridgesWriter.java|   4 +-
 ...er.java => ExtraInfoTestDescriptorBuilder.java} |   4 +-
 ...ava => NetworkStatusTestDescriptorBuilder.java} |   4 +-
 .../bridgedescs/SanitizedBridgesWriterTest.java| 142 +++--
 ...ilder.java => ServerTestDescriptorBuilder.java} |   4 +-
 ...TarballBuilder.java => TarballTestBuilder.java} |  12 +-
 ...ptorBuilder.java => TestDescriptorBuilder.java} |   2 +-
 9 files changed, 145 insertions(+), 138 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java 
b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
new file mode 100644
index 000..12a8956
--- /dev/null
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -0,0 +1,57 @@
+/* Copyright 2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.bridgedescs;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Builder for descriptors.
+ *
+ * This builder class can be used while parsing and processing an original
+ * descriptor. It accepts {@code String}s, {@code DescriptorBuilder}s and
+ * {@code StringBuilder}s. The latter two as placeholders for parts that can
+ * only be processed after finishing the parsing step. */
+class DescriptorBuilder {
+
+  private List parts;
+
+  private StringBuilder lastPart;
+
+  public DescriptorBuilder() {
+this.parts = new ArrayList<>();
+this.lastPart = new StringBuilder();
+this.parts.add(this.lastPart);
+  }
+
+  public DescriptorBuilder append(String sanitizedString) {
+this.lastPart.append(sanitizedString);
+return this;
+  }
+
+  public DescriptorBuilder append(StringBuilder placeholder) {
+this.parts.add(placeholder);
+this.lastPart = new StringBuilder();
+this.parts.add(this.lastPart);
+return this;
+  }
+
+  public DescriptorBuilder space() {
+this.lastPart.append(' ');
+return this;
+  }
+
+  public DescriptorBuilder newLine() {
+this.lastPart.append('\n');
+return this;
+  }
+
+  @Override
+  public String toString() {
+StringBuilder full = new StringBuilder();
+for (StringBuilder part : this.parts) {
+  full.append(part);
+}
+return full.toString();
+  }
+}
diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
deleted file mode 100644
index 174a5ae..000
--- 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package org.torproject.collector.bridgedescs;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/** Builder for sanitized bridge descriptors.
- *
- * This builder class can be used while parsing and sanitizing an original
- * bridge descriptor. It accepts already sanitized {@code String}s and
- * {@code StringBuilder}s as placeholders for parts that can only be sanitized
- * after finishing the parsing step. */
-class SanitizedBridgeDescriptorBuilder {
-
-  private List descriptorParts;
-
-  private StringBuilder lastDescriptorPart;
-
-  SanitizedBridgeDescriptorBuilder() {
-this.descriptorParts = new ArrayList<>();
-this.lastDescriptorPart = new StringBuilder();
-this.descriptorParts.add(this.lastDescriptorPart);
-  }
-
-  SanitizedBridgeDescriptorBuilder append(String sanitizedString) {
-this.lastDescriptorPart.append(sanitizedString);
-return this;
-  }
-
-  SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) {
-this.descriptorParts.add(placeholder);
-this.lastDescriptorPart = new StringBuilder();
-this.descriptorParts.add(this.lastDescriptorPart);
-return this;
-  }
-
-  SanitizedBridgeDescriptorBuilder space() {
-this.lastDescriptorPart.append(' ');
-return this;
-  }
-
-  SanitizedBridgeDescriptorBuilder newLine() {
-this.lastDescriptorPart.append('\n');
-return this;
-  }
-
-  @Override
-  public String toString() {
-StringBuilder fullDescriptor = new StringBuilder();
-for (StringBuilder descriptorPart : this.descriptorParts) {
-  

[tor-commits] [collector/release] Use DescriptorBuilder more often.

2018-02-26 Thread karsten
commit 4e61bb792bc4cd4db9df6eb49ab88890b34ff489
Author: iwakeh 
Date:   Fri Oct 27 17:35:17 2017 +

Use DescriptorBuilder more often.

Add convenience constructor accepting the first string as argument.
---
 .../torproject/collector/bridgedescs/DescriptorBuilder.java  |  5 +
 .../collector/bridgedescs/SanitizedBridgesWriter.java| 12 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java 
b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index 9c47b5e..f530368 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -34,6 +34,11 @@ class DescriptorBuilder {
 this.parts.add(this.lastPart);
   }
 
+  public DescriptorBuilder(String firstString) {
+this();
+this.append(firstString);
+  }
+
   private void throwExceptionIfFinalized() {
 if (this.finalized) {
   throw new IllegalStateException("This DescriptorBuilder is finalized and"
diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index 1d264a5..b4cd49e 100644
--- 
a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ 
b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -461,7 +461,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 }
 
 /* Parse the given network status line by line. */
-StringBuilder header = new StringBuilder();
+DescriptorBuilder header = new DescriptorBuilder();
 boolean includesFingerprintLine = false;
 SortedMap scrubbedLines = new TreeMap<>();
 try {
@@ -483,7 +483,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 
 /* Additional header lines don't have to be cleaned up. */
 } else if (line.startsWith("flag-thresholds ")) {
-  header.append(line + "\n");
+  header.append(line).newLine();
 
 /* The authority fingerprint in the "fingerprint" line can go in
  * unscrubbed. */
@@ -494,7 +494,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 + "\"fingerprint\" line (\"" + line + "\").");
 return;
   }
-  header.append(line + "\n");
+  header.append(line).newLine();
   includesFingerprintLine = true;
 
 /* r lines contain sensitive information that needs to be removed
@@ -593,7 +593,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 scrubbed = new StringBuilder();
   }
   if (!includesFingerprintLine) {
-header.append("fingerprint 
").append(authorityFingerprint).append("\n");
+header.append("fingerprint ").append(authorityFingerprint).newLine();
   }
 
   /* Check if we can tell from the descriptor publication times
@@ -879,13 +879,13 @@ public class SanitizedBridgesWriter extends CollecTorMain 
{
 /* Replace node fingerprints in the family line with their hashes
  * and leave nicknames unchanged. */
 } else if (line.startsWith("family ")) {
-  StringBuilder familyLine = new StringBuilder("family");
+  DescriptorBuilder familyLine = new DescriptorBuilder("family");
   for (String s : line.substring(7).split(" ")) {
 if (s.startsWith("$")) {
   familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex(
   s.substring(1).toCharArray())).toUpperCase());
 } else {
-  familyLine.append(" ").append(s);
+  familyLine.space().append(s);
 }
   }
   scrubbed.append(familyLine.toString()).newLine();



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Reduce memory footprint and wall time.

2018-02-26 Thread karsten
commit 8557bf6255e6e3745088033e8e7bad7801421686
Author: iwakeh 
Date:   Tue Feb 20 16:30:09 2018 +

Reduce memory footprint and wall time.

Adapt to latest changes of metrics-lib (task-25329) and make use of the high
redundancy of logs (e.g. a 3G file might only contain 350 different lines).
This avoids OOM and array out of bounds exceptions for large files (>2G) and
gives a speed-up of roughly 50%. (The earlier 66min are down to 34min for
meronense files plus two larger files.)

There is a BATCH constant, which could be tuned for processing speed. It is
logged for each webstats module run.  Currently, it is set to 100k.  This
was more or less arbitrarily chosen and used for all the tests.  A test run
using 500k didn't show significant differences.
---
 .../persist/WebServerAccessLogPersistence.java |  8 ---
 .../collector/webstats/SanitizeWeblogs.java| 61 ++
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
 
b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
index 792d3a9..dab4112 100644
--- 
a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
+++ 
b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
@@ -5,7 +5,6 @@ package org.torproject.collector.persist;
 
 import org.torproject.descriptor.WebServerAccessLog;
 import org.torproject.descriptor.internal.FileType;
-import org.torproject.descriptor.log.InternalLogDescriptor;
 import org.torproject.descriptor.log.InternalWebServerAccessLog;
 
 import org.slf4j.Logger;
@@ -30,13 +29,6 @@ public class WebServerAccessLogPersistence
   /** Prepare storing the given descriptor. */
   public WebServerAccessLogPersistence(WebServerAccessLog desc) {
 super(desc, new byte[0]);
-byte[] compressedBytes = null;
-try { // The descriptor bytes have to be stored compressed.
-  compressedBytes = COMPRESSION.compress(desc.getRawDescriptorBytes());
-  ((InternalLogDescriptor)desc).setRawDescriptorBytes(compressedBytes);
-} catch (Exception ex) {
-  log.warn("Cannot compress ’{}’.  Storing uncompressed.", ex);
-}
 calculatePaths();
   }
 
diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 7601898..1f2e922 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -3,6 +3,7 @@
 
 package org.torproject.collector.webstats;
 
+import static java.util.stream.Collectors.counting;
 import static java.util.stream.Collectors.groupingByConcurrent;
 import static java.util.stream.Collectors.toList;
 
@@ -17,6 +18,7 @@ import 
org.torproject.collector.persist.WebServerAccessLogPersistence;
 import org.torproject.descriptor.DescriptorParseException;
 import org.torproject.descriptor.Method;
 import org.torproject.descriptor.WebServerAccessLog;
+import org.torproject.descriptor.internal.FileType;
 import org.torproject.descriptor.log.InternalLogDescriptor;
 import org.torproject.descriptor.log.InternalWebServerAccessLog;
 import org.torproject.descriptor.log.WebServerAccessLogImpl;
@@ -26,8 +28,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.InputStreamReader;
+import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.LocalDate;
@@ -40,6 +43,7 @@ import java.util.SortedSet;
 import java.util.StringJoiner;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
@@ -87,6 +91,7 @@ public class SanitizeWeblogs extends CollecTorMain {
   Set sources = this.config.getSourceTypeSet(
   Key.WebstatsSources);
   if (sources.contains(SourceType.Local)) {
+log.info("Processing logs using batch value {}.", BATCH);
 findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins));
 PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath));
   }
@@ -126,24 +131,60 @@ public class SanitizeWeblogs extends CollecTorMain {
 String name = new StringJoiner(InternalLogDescriptor.SEP)
 .add(virtualHost).add(physicalHost)
 .add(InternalWebServerAccessLog.MARKER)
-.add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString();
+.add(date.format(DateTimeFormatter.BASIC_ISO_DATE))
+.toString() + "." + FileType.XZ.name().toLowerCase();
 log.debug("Sanitizing {}.", name);
-List retainedLines = lines
+Map retainedLines = new TreeMap<>(lines
 .stream().parallel().map((line) -> 

[tor-commits] [collector/release] Make DescriptorBuilder also accept DescriptorBuilders.

2018-02-26 Thread karsten
commit fbfa16c05b3f74acd60ccdf780568e7e1b0b9e1b
Author: iwakeh 
Date:   Fri Oct 27 17:35:14 2017 +

Make DescriptorBuilder also accept DescriptorBuilders.

This might facilitate easier processing of descriptors.
---
 .../torproject/collector/bridgedescs/DescriptorBuilder.java | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java 
b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index 12a8956..cc57662 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -14,7 +14,7 @@ import java.util.List;
  * only be processed after finishing the parsing step. */
 class DescriptorBuilder {
 
-  private List parts;
+  private List parts;
 
   private StringBuilder lastPart;
 
@@ -36,6 +36,13 @@ class DescriptorBuilder {
 return this;
   }
 
+  public DescriptorBuilder append(DescriptorBuilder placeholder) {
+this.parts.add(placeholder);
+this.lastPart = new StringBuilder();
+this.parts.add(this.lastPart);
+return this;
+  }
+
   public DescriptorBuilder space() {
 this.lastPart.append(' ');
 return this;
@@ -49,8 +56,8 @@ class DescriptorBuilder {
   @Override
   public String toString() {
 StringBuilder full = new StringBuilder();
-for (StringBuilder part : this.parts) {
-  full.append(part);
+for (Object part : this.parts) {
+  full.append(part.toString());
 }
 return full.toString();
   }



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Add a finalized state to DescriptorBuilder.

2018-02-26 Thread karsten
commit afe07d8efd4dc94b9dfb9b5896002286ba71dc6d
Author: iwakeh 
Date:   Fri Oct 27 17:35:16 2017 +

Add a finalized state to DescriptorBuilder.

To avoid possible inconsistencies DescriptorBuilder is finalized after the 
first
call to 'toString' and cannot be altered anymore.  Any attempt to add more 
leads
to an IllegalStateException.
---
 .../collector/bridgedescs/DescriptorBuilder.java   | 34 --
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java 
b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index 4ca9dd1..9c47b5e 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -12,25 +12,43 @@ import java.util.stream.Collectors;
  * This builder class can be used while parsing and processing an original
  * descriptor. It accepts {@code String}s, {@code DescriptorBuilder}s and
  * {@code StringBuilder}s. The latter two as placeholders for parts that can
- * only be processed after finishing the parsing step. */
+ * only be processed after finishing the parsing step.
+ *
+ * Calling {@code toString} finalizes the builder and any subsequent
+ * method calls other than {@code toString} will result in an 
+ * {@code IllegalStateException}.
+ */
 class DescriptorBuilder {
 
   private List parts;
 
   private StringBuilder lastPart;
 
+  private boolean finalized = false;
+
+  private String value;
+
   public DescriptorBuilder() {
 this.parts = new ArrayList<>();
 this.lastPart = new StringBuilder();
 this.parts.add(this.lastPart);
   }
 
+  private void throwExceptionIfFinalized() {
+if (this.finalized) {
+  throw new IllegalStateException("This DescriptorBuilder is finalized and"
+  + " calling anything other than 'toString' is illegal.");
+}
+  }
+
   public DescriptorBuilder append(String sanitizedString) {
+this.throwExceptionIfFinalized();
 this.lastPart.append(sanitizedString);
 return this;
   }
 
   public DescriptorBuilder append(StringBuilder placeholder) {
+this.throwExceptionIfFinalized();
 this.parts.add(placeholder);
 this.lastPart = new StringBuilder();
 this.parts.add(this.lastPart);
@@ -38,6 +56,7 @@ class DescriptorBuilder {
   }
 
   public DescriptorBuilder append(DescriptorBuilder placeholder) {
+this.throwExceptionIfFinalized();
 this.parts.add(placeholder);
 this.lastPart = new StringBuilder();
 this.parts.add(this.lastPart);
@@ -45,18 +64,27 @@ class DescriptorBuilder {
   }
 
   public DescriptorBuilder space() {
+this.throwExceptionIfFinalized();
 this.lastPart.append(' ');
 return this;
   }
 
   public DescriptorBuilder newLine() {
+this.throwExceptionIfFinalized();
 this.lastPart.append('\n');
 return this;
   }
 
   @Override
   public String toString() {
-return this.parts.stream().map(part -> part.toString())
-.collect(Collectors.joining(""));
+if (!this.finalized) {
+  this.finalized = true;
+  this.value = this.parts.stream().map(part -> part.toString())
+  .collect(Collectors.joining(""));
+  this.parts.clear(); // not needed anymore
+  this.lastPart = null;
+}
+return value;
   }
+
 }



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Prepare for 1.5.0 release.

2018-02-26 Thread karsten
commit ddfa7bad243dc8e0a3105b14418794c87786e88f
Author: Karsten Loesing 
Date:   Mon Feb 26 14:23:05 2018 +0100

Prepare for 1.5.0 release.
---
 CERT | 20 ++--
 CHANGELOG.md |  2 +-
 build.xml|  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/CERT b/CERT
index d94c0c3..43be56e 100644
--- a/CERT
+++ b/CERT
@@ -1,8 +1,8 @@
 -BEGIN CERTIFICATE-
-MIIDaTCCAlGgAwIBAgIEZTniETANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
+MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
 UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU
-b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw
-ODE3MTg1MDQ0WhcNMTcxMTE1MTg1MDQ0WjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
+b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw
+MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
 CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj
 dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB
 AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU
@@ -11,11 +11,11 @@ 
Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE
 MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD
 oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX
 lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud
-DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAbsAc
-gwl5KJH3pVKw4b+ACCOMgW+27MisCFbT1Izq2Wx+JcLMt3N//MoIpYOZWhsIeazW
-/NE0fNbkLi0IYA0F1nUC9pHl44Hd8Gjfqa/YQUi9ALtgsY7l6W0sceW8WnZ8bu8J
-DfrqnmB0bD2xc9ZjOn58al8dVjVWs95M87D9WCRU6LiaKFj5c45wciABQsTmC0qD
-pyHYOaSGtXxXKDw5pAntdtHkCbowV5tDi/QQ8Tg7i5O7xwSh71Q7TZiNFMpLomBL
-QllHfTZryFmoHyGn5MfngBUVCVHig5nXmk0dUMGuLiK4789dkgiPRz0vpB5Yf8Yy
-CCE2jB6VBi2g5fMx0w==
+DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU
+qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x
+RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq
+MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO
+zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG
+qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW
+2DiJiA8BLxZToe2XDA==
 -END CERTIFICATE-
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a0b5d1f..eb14839 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# Changes in version 1.5.0 - 2018-01-31
+# Changes in version 1.5.0 - 2018-02-26
 
  * Major changes
- Update to metrics-lib 2.2.0.
diff --git a/build.xml b/build.xml
index 48f6e33..ff8302b 100644
--- a/build.xml
+++ b/build.xml
@@ -8,7 +8,7 @@
 
   
   
-  
+  
   
   
   

___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Circumvent Collection (integer) size limit.

2018-02-26 Thread karsten
commit d05b4e4aee3bc15c3e4d5bac660dfcee5bc26279
Author: iwakeh 
Date:   Tue Feb 20 16:30:14 2018 +

Circumvent Collection (integer) size limit.

Clean log lines immediately when they are read and also make use of 
sanitized
log's high redundancy immediately, i.e., continue with maps of
>.

Rename method(s) to reflect what they do.
---
 .../collector/webstats/SanitizeWeblogs.java| 89 --
 1 file changed, 65 insertions(+), 24 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 1f2e922..5a270dd 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -4,8 +4,10 @@
 package org.torproject.collector.webstats;
 
 import static java.util.stream.Collectors.counting;
+import static java.util.stream.Collectors.groupingBy;
 import static java.util.stream.Collectors.groupingByConcurrent;
-import static java.util.stream.Collectors.toList;
+import static java.util.stream.Collectors.reducing;
+import static java.util.stream.Collectors.summingLong;
 
 import org.torproject.collector.conf.Configuration;
 import org.torproject.collector.conf.ConfigurationException;
@@ -35,6 +37,8 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -111,35 +115,36 @@ public class SanitizeWeblogs extends CollecTorMain {
   : virtualEntry.getValue().entrySet()) {
 String physicalHost = physicalEntry.getKey();
 log.info("Processing logs for {} on {}.", virtualHost, physicalHost);
-Map linesByDate
+Map> linesByDate
 = physicalEntry.getValue().values().stream().parallel()
-.flatMap((LogMetadata metadata) -> lineStream(metadata)
-   .filter((line) -> line.isValid())).parallel()
-.collect(groupingByConcurrent(WebServerAccessLogLine::getDate));
+.flatMap(metadata -> sanitzedLineStream(metadata).entrySet()
+.stream())
+.collect(groupingBy(Map.Entry::getKey,
+  reducing(Collections.emptyMap(), Map.Entry::getValue,
+(e1, e2) -> Stream.concat(e1.entrySet().stream(), e2.entrySet()
+  .stream())
+  .collect(groupingByConcurrent(Map.Entry::getKey,
+  summingLong(Map.Entry::getValue));
 LocalDate[] interval = determineInterval(linesByDate.keySet());
 linesByDate.entrySet().stream()
 .filter((entry) -> entry.getKey().isAfter(interval[0])
   && entry.getKey().isBefore(interval[1])).parallel()
-.forEach((entry) -> storeSanitized(virtualHost, physicalHost,
+.forEach((entry) -> storeSortedAndForget(virtualHost, physicalHost,
   entry.getKey(), entry.getValue()));
   }
 }
   }
 
-  private void storeSanitized(String virtualHost, String physicalHost,
-  LocalDate date, List lines) {
+  private void storeSortedAndForget(String virtualHost, String physicalHost,
+  LocalDate date, Map lineCounts) {
 String name = new StringJoiner(InternalLogDescriptor.SEP)
 .add(virtualHost).add(physicalHost)
 .add(InternalWebServerAccessLog.MARKER)
 .add(date.format(DateTimeFormatter.BASIC_ISO_DATE))
 .toString() + "." + FileType.XZ.name().toLowerCase();
-log.debug("Sanitizing {}.", name);
-Map retainedLines = new TreeMap<>(lines
-.stream().parallel().map((line) -> sanitize(line, date))
-.filter((line) -> line.isPresent())
-.map((line) -> line.get())
-.collect(groupingByConcurrent(line -> line, counting(;
-lines.clear(); // not needed anymore
+log.debug("Storing {}.", name);
+Map retainedLines = new TreeMap<>(lineCounts);
+lineCounts.clear(); // not needed anymore
 try {
   WebServerAccessLogPersistence walp
   = new WebServerAccessLogPersistence(
@@ -187,8 +192,8 @@ public class SanitizeWeblogs extends CollecTorMain {
 .collect(Collectors.joining("\n", "", "\n")).getBytes();
   }
 
-  static Optional sanitize(WebServerAccessLogLine logLine,
-  LocalDate date) {
+  static Optional
+  sanitize(WebServerAccessLogLine logLine) {
 if (!logLine.isValid()
 || !(Method.GET == logLine.getMethod()
  || Method.HEAD == logLine.getMethod())
@@ -203,10 +208,13 @@ public class SanitizeWeblogs extends CollecTorMain {
 if (queryStart > 0) {
   logLine.setRequest(logLine.getRequest().substring(0, queryStart));
 }
-  

[tor-commits] [collector/release] Add webstats module with sync and local import functionality.

2018-02-26 Thread karsten
commit 97e577ae73ec631ac5d7448cb9f525594baa0c8a
Author: iwakeh 
Date:   Mon Oct 9 12:23:53 2017 +

Add webstats module with sync and local import functionality.

Implements task-22428.
---
 CHANGELOG.md   |   6 +-
 build.xml  |   2 +-
 src/main/java/org/torproject/collector/Main.java   |   2 +
 .../torproject/collector/conf/Configuration.java   |   3 +-
 .../java/org/torproject/collector/conf/Key.java|   9 +-
 .../collector/persist/DescriptorPersistence.java   |   2 +
 .../persist/WebServerAccessLogPersistence.java |  73 
 .../torproject/collector/sync/SyncPersistence.java |   7 +
 .../torproject/collector/webstats/LogFileMap.java  | 115 
 .../torproject/collector/webstats/LogMetadata.java |  87 +
 .../collector/webstats/SanitizeWeblogs.java| 198 +
 src/main/resources/collector.properties|  20 ++-
 .../collector/conf/ConfigurationTest.java  |   2 +-
 .../collector/cron/CollecTorMainTest.java  |   1 +
 .../collector/sync/SyncPersistenceTest.java|  68 +++
 .../collector/webstats/LogFileMapTest.java |  33 
 .../collector/webstats/LogMetadataTest.java|  82 +
 ...eotrichon.torproject.org_access.log_20151007.xz | Bin 0 -> 4056 bytes
 ...meronense.torproject.org_access.log_20170531.gz | Bin 0 -> 388 bytes
 19 files changed, 671 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2f4cd21..a0b5d1f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
-# Changes in version 1.?.? - 201?-??-??
+# Changes in version 1.5.0 - 2018-01-31
+
+ * Major changes
+   - Update to metrics-lib 2.2.0.
+   - Add new module for processing and sanitizing Tor web server logs.
 
  * Minor changes
- Exclude lastModifiedMillis in index.json.
diff --git a/build.xml b/build.xml
index f004f29..48f6e33 100644
--- a/build.xml
+++ b/build.xml
@@ -11,7 +11,7 @@
   
   
   
-  
+  
   
 
   
diff --git a/src/main/java/org/torproject/collector/Main.java 
b/src/main/java/org/torproject/collector/Main.java
index 50cc8be..70cdbfa 100644
--- a/src/main/java/org/torproject/collector/Main.java
+++ b/src/main/java/org/torproject/collector/Main.java
@@ -14,6 +14,7 @@ import org.torproject.collector.exitlists.ExitListDownloader;
 import org.torproject.collector.index.CreateIndexJson;
 import org.torproject.collector.onionperf.OnionPerfDownloader;
 import org.torproject.collector.relaydescs.ArchiveWriter;
+import org.torproject.collector.webstats.SanitizeWeblogs;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -51,6 +52,7 @@ public class Main {
 collecTorMains.put(Key.UpdateindexActivated, CreateIndexJson.class);
 collecTorMains.put(Key.RelaydescsActivated, ArchiveWriter.class);
 collecTorMains.put(Key.OnionPerfActivated, OnionPerfDownloader.class);
+collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class);
   }
 
   private static Configuration conf = new Configuration();
diff --git a/src/main/java/org/torproject/collector/conf/Configuration.java 
b/src/main/java/org/torproject/collector/conf/Configuration.java
index 57f9731..72bd5fc 100644
--- a/src/main/java/org/torproject/collector/conf/Configuration.java
+++ b/src/main/java/org/torproject/collector/conf/Configuration.java
@@ -92,7 +92,8 @@ public class Configuration extends Observable implements 
Cloneable {
 || this.getBool(Key.BridgedescsActivated)
 || this.getBool(Key.ExitlistsActivated)
 || this.getBool(Key.UpdateindexActivated)
-|| this.getBool(Key.OnionPerfActivated))) {
+|| this.getBool(Key.OnionPerfActivated)
+|| this.getBool(Key.WebstatsActivated))) {
   throw new ConfigurationException("Nothing is activated!\n"
   + "Please edit collector.properties. Exiting.");
 }
diff --git a/src/main/java/org/torproject/collector/conf/Key.java 
b/src/main/java/org/torproject/collector/conf/Key.java
index e0a20a7..6454009 100644
--- a/src/main/java/org/torproject/collector/conf/Key.java
+++ b/src/main/java/org/torproject/collector/conf/Key.java
@@ -28,6 +28,7 @@ public enum Key {
   BridgeSources(SourceType[].class),
   ExitlistSources(SourceType[].class),
   OnionPerfSources(SourceType[].class),
+  WebstatsSources(SourceType[].class),
   RelayCacheOrigins(String[].class),
   RelayLocalOrigins(Path.class),
   RelaySyncOrigins(URL[].class),
@@ -35,6 +36,8 @@ public enum Key {
   BridgeLocalOrigins(Path.class),
   ExitlistSyncOrigins(URL[].class),
   OnionPerfSyncOrigins(URL[].class),
+  WebstatsSyncOrigins(URL[].class),
+  WebstatsLocalOrigins(Path.class),
   BridgedescsActivated(Boolean.class),
   BridgedescsOffsetMinutes(Integer.class),
   BridgedescsPeriodMinutes(Integer.class),
@@ -58,7 +61,11 @@ public enum Key {
   KeepDirectoryArchiveImportHistory(Boolean.class),
   ReplaceIpAddressesWithHashes(Boolean.class),
   

[tor-commits] [collector/release] Exclude lastModifiedMillis in index.json.

2018-02-26 Thread karsten
commit b23232bd44c82defee92cbe1d697cafb7862205a
Author: Karsten Loesing 
Date:   Thu Dec 14 10:13:11 2017 +0100

Exclude lastModifiedMillis in index.json.

Fixes #24621.
---
 CHANGELOG.md  | 6 ++
 src/main/java/org/torproject/collector/index/CreateIndexJson.java | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c924f5e..2f4cd21 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# Changes in version 1.?.? - 201?-??-??
+
+ * Minor changes
+   - Exclude lastModifiedMillis in index.json.
+
+
 # Changes in version 1.4.1 - 2017-10-26
 
  * Medium changes
diff --git a/src/main/java/org/torproject/collector/index/CreateIndexJson.java 
b/src/main/java/org/torproject/collector/index/CreateIndexJson.java
index 5c4daf9..c4399b8 100644
--- a/src/main/java/org/torproject/collector/index/CreateIndexJson.java
+++ b/src/main/java/org/torproject/collector/index/CreateIndexJson.java
@@ -163,7 +163,8 @@ public class CreateIndexJson extends CollecTorMain {
 
   private void writeIndex(IndexNode indexNode) throws Exception {
 indexJsonFile.getParentFile().mkdirs();
-Gson gson = new GsonBuilder().create();
+Gson gson = new GsonBuilder().excludeFieldsWithoutExposeAnnotation()
+.create();
 String indexNodeString = gson.toJson(indexNode);
 for (String filename : new String[] {indexJsonFile.toString(),
 indexJsonFile + ".gz", indexJsonFile + ".xz", indexJsonFile + ".bz2"}) 
{



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Avoid repeated validation of clean and validated log lines.

2018-02-26 Thread karsten
commit 06d1a81d4cfe8ce89caa6cf49124f6700b33f522
Author: iwakeh 
Date:   Wed Jan 31 13:31:28 2018 +

Avoid repeated validation of clean and validated log lines.
---
 src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 635457c..53ad118 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -136,7 +136,7 @@ public class SanitizeWeblogs extends CollecTorMain {
 try {
   WebServerAccessLogPersistence walp
   = new WebServerAccessLogPersistence(
-  new WebServerAccessLogImpl(retainedLines, name));
+  new WebServerAccessLogImpl(retainedLines, name, false));
   log.debug("Storing {}.", name);
   walp.storeOut(this.outputPathName);
   walp.storeRecent(this.recentPathName);



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Bump version to 1.4.1-dev.

2018-02-26 Thread karsten
commit 60dfface9783b5715717dd10fbd90c4dc93e4321
Author: Karsten Loesing 
Date:   Thu Oct 26 10:16:35 2017 +0200

Bump version to 1.4.1-dev.
---
 build.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.xml b/build.xml
index fb87709..f004f29 100644
--- a/build.xml
+++ b/build.xml
@@ -8,7 +8,7 @@
 
   
   
-  
+  
   
   
   



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Update metrics-base.

2018-02-26 Thread karsten
commit ee7f1353a22b3d19857722b5b68604e2517012c5
Author: Karsten Loesing 
Date:   Fri Dec 15 17:01:27 2017 +0100

Update metrics-base.
---
 src/build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/build b/src/build
index 4b34756..23c6e0b 16
--- a/src/build
+++ b/src/build
@@ -1 +1 @@
-Subproject commit 4b34756ddd71ccaf0fc30e5f5bf0a813a297d4a3
+Subproject commit 23c6e0be5fab9463f137615053ef412e4da2315e



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Parallelize two more processing steps.

2018-02-26 Thread karsten
commit 15db1e2a793ac7e67a1e7aa87c2ea857825a98a2
Author: iwakeh 
Date:   Wed Jan 31 13:31:25 2018 +

Parallelize two more processing steps.
---
 src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index a3d2a7e..4496861 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -113,7 +113,7 @@ public class SanitizeWeblogs extends CollecTorMain {
 LocalDate[] interval = determineInterval(linesByDate.keySet());
 linesByDate.entrySet().stream()
 .filter((entry) -> entry.getKey().isAfter(interval[0])
-  && entry.getKey().isBefore(interval[1]))
+  && entry.getKey().isBefore(interval[1])).parallel()
 .forEach((entry) -> storeSanitized(virtualHost, physicalHost,
   entry.getKey(), entry.getValue()));
   }
@@ -128,7 +128,7 @@ public class SanitizeWeblogs extends CollecTorMain {
 .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString();
 log.debug("Sanitizing {}.", name);
 List retainedLines = lines
-.stream().map((line) -> sanitize(line, date))
+.stream().parallel().map((line) -> sanitize(line, date))
 .filter((line) -> line.isPresent()).map((line) -> line.get())
 .collect(Collectors.toList());
 retainedLines.sort(null);



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Update copyright to 2018.

2018-02-26 Thread karsten
commit 7f01208aedf150822f589375bf2cf45b3a6af5c0
Author: Karsten Loesing 
Date:   Tue Jan 9 10:23:10 2018 +0100

Update copyright to 2018.
---
 src/main/java/org/torproject/collector/Main.java| 2 +-
 .../org/torproject/collector/bridgedescs/BridgeDescriptorParser.java| 2 +-
 .../java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java | 2 +-
 .../org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java| 2 +-
 src/main/java/org/torproject/collector/conf/Annotation.java | 2 +-
 src/main/java/org/torproject/collector/conf/Configuration.java  | 2 +-
 src/main/java/org/torproject/collector/conf/ConfigurationException.java | 2 +-
 src/main/java/org/torproject/collector/conf/Key.java| 2 +-
 src/main/java/org/torproject/collector/conf/SourceType.java | 2 +-
 src/main/java/org/torproject/collector/cron/CollecTorMain.java  | 2 +-
 src/main/java/org/torproject/collector/cron/Scheduler.java  | 2 +-
 src/main/java/org/torproject/collector/cron/ShutdownHook.java   | 2 +-
 .../java/org/torproject/collector/exitlists/ExitListDownloader.java | 2 +-
 src/main/java/org/torproject/collector/index/CreateIndexJson.java   | 2 +-
 .../java/org/torproject/collector/onionperf/OnionPerfDownloader.java| 2 +-
 .../org/torproject/collector/persist/BridgeExtraInfoPersistence.java| 2 +-
 .../torproject/collector/persist/BridgeServerDescriptorPersistence.java | 2 +-
 .../java/org/torproject/collector/persist/ConsensusPersistence.java | 2 +-
 .../java/org/torproject/collector/persist/DescriptorPersistence.java| 2 +-
 src/main/java/org/torproject/collector/persist/ExitlistPersistence.java | 2 +-
 .../java/org/torproject/collector/persist/ExtraInfoPersistence.java | 2 +-
 .../org/torproject/collector/persist/MicroConsensusPersistence.java | 2 +-
 .../java/org/torproject/collector/persist/OnionPerfPersistence.java | 2 +-
 src/main/java/org/torproject/collector/persist/PersistenceUtils.java| 2 +-
 .../org/torproject/collector/persist/ServerDescriptorPersistence.java   | 2 +-
 src/main/java/org/torproject/collector/persist/StatusPersistence.java   | 2 +-
 src/main/java/org/torproject/collector/persist/VotePersistence.java | 2 +-
 src/main/java/org/torproject/collector/persist/package-info.java| 2 +-
 src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java| 2 +-
 src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java| 2 +-
 .../torproject/collector/relaydescs/CachedRelayDescriptorReader.java| 2 +-
 src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java | 2 +-
 .../org/torproject/collector/relaydescs/RelayDescriptorDownloader.java  | 2 +-
 .../java/org/torproject/collector/relaydescs/RelayDescriptorParser.java | 2 +-
 src/main/java/org/torproject/collector/sync/Criterium.java  | 2 +-
 src/main/java/org/torproject/collector/sync/ProcessCriterium.java   | 2 +-
 src/main/java/org/torproject/collector/sync/SyncManager.java| 2 +-
 src/main/java/org/torproject/collector/sync/SyncPersistence.java| 2 +-
 src/main/java/org/torproject/collector/sync/package-info.java   | 2 +-
 src/main/resources/bootstrap-development.sh | 2 +-
 src/main/resources/create-tarballs.sh   | 2 +-
 src/test/java/org/torproject/collector/MainTest.java| 2 +-
 .../torproject/collector/bridgedescs/BridgeDescriptorParserTest.java| 2 +-
 .../java/org/torproject/collector/bridgedescs/DescriptorBuilder.java| 2 +-
 .../torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java| 2 +-
 .../java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java | 2 +-
 .../torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java| 2 +-
 .../org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java   | 2 +-
 src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java  | 2 +-
 src/test/java/org/torproject/collector/conf/ConfigurationTest.java  | 2 +-
 src/test/java/org/torproject/collector/cron/Broken.java | 2 +-
 src/test/java/org/torproject/collector/cron/CollecTorMainTest.java  | 2 +-
 src/test/java/org/torproject/collector/cron/SchedulerTest.java  | 2 +-
 src/test/java/org/torproject/collector/persist/PersistUtilsTest.java| 2 +-
 .../java/org/torproject/collector/relaydescs/ReferenceCheckerTest.java  | 2 +-
 src/test/java/org/torproject/collector/sync/FileCollector.java  | 2 +-
 src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java| 2 +-
 57 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/src/main/java/org/torproject/collector/Main.java 
b/src/main/java/org/torproject/collector/Main.java
index 1377fec..50cc8be 100644
--- a/src/main/java/org/torproject/collector/Main.java
+++ 

[tor-commits] [collector/release] Use enum Method from metrics-lib.

2018-02-26 Thread karsten
commit 2a0aa8c7f8c956ba15c5780bc078d5330322fcd5
Author: iwakeh 
Date:   Wed Jan 31 13:31:24 2018 +

Use enum Method from metrics-lib.
---
 src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 88d62fa..a3d2a7e 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -15,6 +15,7 @@ import org.torproject.descriptor.DescriptorParseException;
 import org.torproject.descriptor.WebServerAccessLog;
 import org.torproject.descriptor.log.InternalLogDescriptor;
 import org.torproject.descriptor.log.InternalWebServerAccessLog;
+import org.torproject.descriptor.log.Method;
 import org.torproject.descriptor.log.WebServerAccessLogImpl;
 import org.torproject.descriptor.log.WebServerAccessLogLine;
 
@@ -146,8 +147,8 @@ public class SanitizeWeblogs extends CollecTorMain {
   static Optional sanitize(WebServerAccessLogLine logLine,
   LocalDate date) {
 if (!logLine.isValid()
-|| !("GET".equals(logLine.getMethod())
- || "HEAD".equals(logLine.getMethod()))
+|| !(Method.GET == logLine.getMethod()
+ || Method.HEAD == logLine.getMethod())
 || !logLine.getProtocol().startsWith("HTTP")
 || 400 == logLine.getResponse() || 404 == logLine.getResponse()) {
   return Optional.empty();



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/release] Optimize parallel processing and use static imports for readability.

2018-02-26 Thread karsten
commit bd948070e03ff71503fdba84cff6bc61c9fbe452
Author: iwakeh 
Date:   Wed Jan 31 13:31:26 2018 +

Optimize parallel processing and use static imports for readability.
---
 .../torproject/collector/webstats/SanitizeWeblogs.java   | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 4496861..635457c 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -3,6 +3,9 @@
 
 package org.torproject.collector.webstats;
 
+import static java.util.stream.Collectors.groupingByConcurrent;
+import static java.util.stream.Collectors.toList;
+
 import org.torproject.collector.conf.Configuration;
 import org.torproject.collector.conf.ConfigurationException;
 import org.torproject.collector.conf.Key;
@@ -37,7 +40,6 @@ import java.util.SortedSet;
 import java.util.StringJoiner;
 import java.util.TreeMap;
 import java.util.TreeSet;
-import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
@@ -106,10 +108,9 @@ public class SanitizeWeblogs extends CollecTorMain {
 log.info("Processing logs for {} on {}.", virtualHost, physicalHost);
 Map linesByDate
 = physicalEntry.getValue().values().stream().parallel()
-.flatMap((LogMetadata metadata)
--> lineStream(metadata).filter((line) -> line.isValid()))
-.collect(Collectors.groupingBy(WebServerAccessLogLine::getDate,
-Collectors.toList()));
+.flatMap((LogMetadata metadata) -> lineStream(metadata)
+   .filter((line) -> line.isValid())).parallel()
+.collect(groupingByConcurrent(WebServerAccessLogLine::getDate));
 LocalDate[] interval = determineInterval(linesByDate.keySet());
 linesByDate.entrySet().stream()
 .filter((entry) -> entry.getKey().isAfter(interval[0])
@@ -130,7 +131,7 @@ public class SanitizeWeblogs extends CollecTorMain {
 List retainedLines = lines
 .stream().parallel().map((line) -> sanitize(line, date))
 .filter((line) -> line.isPresent()).map((line) -> line.get())
-.collect(Collectors.toList());
+.collect(toList());
 retainedLines.sort(null);
 try {
   WebServerAccessLogPersistence walp
@@ -142,6 +143,7 @@ public class SanitizeWeblogs extends CollecTorMain {
 } catch (DescriptorParseException dpe) {
   log.error("Cannot store log desriptor {}.", name, dpe);
 }
+lines.clear();
   }
 
   static Optional sanitize(WebServerAccessLogLine logLine,
@@ -188,7 +190,7 @@ public class SanitizeWeblogs extends CollecTorMain {
  metadata.fileType.decompress(Files.readAllBytes(metadata.path)) {
   return br.lines()
   .map((String line) -> WebServerAccessLogLine.makeLine(line))
-  .collect(Collectors.toList()).stream();
+  .collect(toList()).stream();
 } catch (Exception ex) {
   log.debug("Skipping log-file {}.", metadata.path, ex);
 }



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [metrics-lib/release] Accomodate logs with more than Integer.MAX_VALUE lines.

2018-02-26 Thread karsten
commit 35feb816f81f26bcc9dc035a1aaf496c34a86647
Author: iwakeh 
Date:   Fri Feb 16 09:05:46 2018 +

Accomodate logs with more than Integer.MAX_VALUE lines.

Implements task-23046.
---
 .../org/torproject/descriptor/LogDescriptor.java   | 10 +--
 .../torproject/descriptor/WebServerAccessLog.java  |  6 
 .../descriptor/log/WebServerAccessLogImpl.java | 32 ++
 .../descriptor/log/LogDescriptorTest.java  |  5 +++-
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java 
b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index 826fcda..8dd8460 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
 
 import java.io.InputStream;
 import java.util.List;
+import java.util.stream.Stream;
 
 /**
  * Contains a log file.
@@ -64,11 +65,14 @@ public interface LogDescriptor extends Descriptor {
   public List getUnrecognizedLines();
 
   /**
-   * Returns a list of all parseable log lines.
-   * Might require a lot of memory depending on log size.
+   * Returns a stream of all parseable log lines.
+   * Depending on log size this might not fit into a collection type.
+   *
+   * @since 2.2.0
*/
-  public List logLines() throws DescriptorParseException;
+  public Stream logLines() throws DescriptorParseException;
 
+  /** Base interface for accessing log lines. */
   public interface Line {
 
 /** Returns a log line string. */
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java 
b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b4f1940..5f3ad73 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -62,6 +62,12 @@ public interface WebServerAccessLog extends LogDescriptor {
   @Override
   public List getUnrecognizedLines();
 
+  /**
+   * Facilitates access to all log line fields that don't only contain
+   * default values post sanitization.
+   *
+   * @since 2.2.0
+   */
   public interface Line extends LogDescriptor.Line {
 
 /** Returns the IP address of the requesting host. */
diff --git 
a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java 
b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index e48a262..3666d5d 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -15,10 +15,11 @@ import java.io.File;
 import java.io.InputStreamReader;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Implementation of web server access log descriptors.
@@ -128,15 +129,34 @@ public class WebServerAccessLogImpl extends 
LogDescriptorImpl
 return this.logDate;
   }
 
-  /** Returns a list of all valid log lines. */
+  private static final int LISTLIMIT = Integer.MAX_VALUE / 2;
+
+  /** Returns a stream of all valid log lines. */
   @Override
-  public List logLines()
+  public Stream logLines()
   throws DescriptorParseException {
 try (BufferedReader br = new BufferedReader(new InputStreamReader(
 this.decompressedByteStream( {
-  return br.lines().map(line
-  -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line))
-.filter(line -> line.isValid()).collect(Collectors.toList());
+  List lists = new ArrayList<>();
+  List currentList = new ArrayList<>();
+  lists.add(currentList);
+  String lineStr = br.readLine();
+  int count = 0;
+  while (null != lineStr) {
+WebServerAccessLogLine wsal = WebServerAccessLogLine.makeLine(lineStr);
+if (wsal.isValid()) {
+  currentList.add(wsal);
+  count++;
+}
+if (count >= LISTLIMIT) {
+  currentList = new ArrayList<>();
+  lists.add(currentList);
+  count = 0;
+}
+lineStr = br.readLine();
+  }
+  br.close();
+  return lists.stream().flatMap(list -> list.stream());
 } catch (Exception ex) {
   throw new DescriptorParseException("Cannot retrieve log lines.", ex);
 }
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java 
b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index 67ba638..0ff3e62 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -3,6 +3,8 @@
 
 package org.torproject.descriptor.log;
 
+import static java.util.stream.Collectors.toList;

[tor-commits] [metrics-lib/release] Enable handling of larger (> 2G) log files.

2018-02-26 Thread karsten
commit c01dfbdc4e5d817fb10fb45f79d8223853cdeac0
Author: iwakeh 
Date:   Fri Feb 16 09:05:46 2018 +

Enable handling of larger (> 2G) log files.

As log files can be compressed very efficiently log descriptor raw bytes 
contain
the compressed bytes.

Added methods for accessing uncompressed log content, as well as stream 
based
methods for decompression and compression in class FileType.
Adapted all tests to the changes.

Implements task-25329.
---
 .../org/torproject/descriptor/LogDescriptor.java   | 26 --
 .../torproject/descriptor/internal/FileType.java   | 18 ++
 .../descriptor/log/LogDescriptorImpl.java  | 25 --
 .../descriptor/log/WebServerAccessLogImpl.java | 12 +++
 .../descriptor/log/LogDescriptorTest.java  | 40 +++---
 .../descriptor/log/WebServerModuleTest.java|  4 ++-
 6 files changed, 82 insertions(+), 43 deletions(-)

diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java 
b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index 6a6bf84..826fcda 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -3,21 +3,34 @@
 
 package org.torproject.descriptor;
 
+import java.io.InputStream;
 import java.util.List;
 
 /**
  * Contains a log file.
  *
  * Unlike other descriptors, logs can get very large and are typically 
stored
- * on disk in compressed form. However, all access to log contents through this
- * interface and its subinterfaces is made available in uncompressed form.
+ * on disk in compressed form. Access to log contents through this
+ * interface and its subinterfaces is made available in compressed and
+ * decompressed form:
+ * 
+ * The raw descriptor bytes are compressed, because logs contain
+ * often redundant information that can achieve high compression rates.
+ * For example, a 500kB compressed log file might be deflated to 3GB.
+ * The uncompressed log contents can be accessed as a stream of bytes.
+ * A list of log lines (decompressed) can be retrieved.
+ * 
+ * 
  *
  * @since 2.2.0
  */
 public interface LogDescriptor extends Descriptor {
 
   /**
-   * Returns the decompressed raw descriptor bytes of the log.
+   * Returns the raw compressed descriptor bytes of the log.
+   *
+   * For access to the log's decompressed bytes of
+   * use method {@code decompressedByteStream}.
*
* @since 2.2.0
*/
@@ -25,6 +38,13 @@ public interface LogDescriptor extends Descriptor {
   public byte[] getRawDescriptorBytes();
 
   /**
+   * Returns the decompressed raw descriptor bytes of the log as stream.
+   *
+   * @since 2.2.0
+   */
+  public InputStream decompressedByteStream() throws DescriptorParseException;
+
+  /**
* Returns annotations found in the log file, which may be an empty List if a
* log format does not support adding annotations.
*
diff --git a/src/main/java/org/torproject/descriptor/internal/FileType.java 
b/src/main/java/org/torproject/descriptor/internal/FileType.java
index 353f0bb..2c07df6 100644
--- a/src/main/java/org/torproject/descriptor/internal/FileType.java
+++ b/src/main/java/org/torproject/descriptor/internal/FileType.java
@@ -93,6 +93,24 @@ public enum FileType {
   }
 
   /**
+   * Compresses the given InputStream and returns an OutputStream.
+   *
+   * @since 2.2.0
+   */
+  public OutputStream compress(OutputStream os) throws Exception {
+return this.outputStream(os);
+  }
+
+  /**
+   * Decompresses the given InputStream and returns an OutputStream.
+   *
+   * @since 2.2.0
+   */
+  public InputStream decompress(InputStream is) throws Exception {
+return this.inputStream(is);
+  }
+
+  /**
* Decompresses the given bytes in memory and returns the decompressed bytes.
*
* @since 2.2.0
diff --git a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java 
b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
index 97854e4..3583d26 100644
--- a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
@@ -14,10 +14,10 @@ import org.slf4j.LoggerFactory;
 import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -76,8 +76,7 @@ public abstract class LogDescriptorImpl
   this.fileType = FileType.findType(mat.group(1).toUpperCase());
   if (FileType.PLAIN == this.fileType) {
 this.fileType = defaultCompression;
-  } else {
-this.logBytes = this.fileType.decompress(this.logBytes);
+this.logBytes = this.fileType.compress(this.logBytes);
   }
 } catch 

[tor-commits] [metrics-lib/release] Prepare for 2.2.0 release, again.

2018-02-26 Thread karsten
commit 72f62fbe48d7cbb2de6d7fd5fd38b867fb0b185e
Author: Karsten Loesing 
Date:   Mon Feb 26 14:12:31 2018 +0100

Prepare for 2.2.0 release, again.
---
 CERT | 20 ++--
 CHANGELOG.md |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/CERT b/CERT
index 0f90f7a..43be56e 100644
--- a/CERT
+++ b/CERT
@@ -1,8 +1,8 @@
 -BEGIN CERTIFICATE-
-MIIDaTCCAlGgAwIBAgIEM/uNRzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
+MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
 UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU
-b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw
-NTE2MTQ0NDAwWhcNMTcwODE0MTQ0NDAwWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
+b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw
+MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
 CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj
 dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB
 AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU
@@ -11,11 +11,11 @@ 
Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE
 MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD
 oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX
 lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud
-DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEACxUk
-6eZkayjJGZjw02bWjrkwwe/N5iOnOz9XcpGQY5lUEk12uIYCYHDB66SsZngsbwNQ
-qZ8T6Avm9XSrKAKMxBcPsjTUy0Fb7yG3jLRypPlbgoe61YoliYYaWiaV1OQY1nsZ
-ujt6f7T5bwsLeh7La7+kB1SGDu+hKH7Bi3RtVN09iL7x+BJVuJOKHKYoK+UO2fB3
-ltFenkGpiDzGQQxJRzA1WbCQsSPrE6Qi3NmWXsTAc+24Y9gTO8qlmfi6zp1W7TGO
-+cUmHIO0u3+ewkb8oIj8jjzGTmEbuJrwgBOohQr6CXCicQHhIrBF6Opi7mF/UHSO
-J8d+DLvT41EL9JrgCg==
+DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU
+qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x
+RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq
+MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO
+zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG
+qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW
+2DiJiA8BLxZToe2XDA==
 -END CERTIFICATE-
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 91f00b8..4d87ba4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# Changes in version 2.2.0 - 2018-01-31
+# Changes in version 2.2.0 - 2018-02-26
 
  * Major changes
- Add new descriptor type WebServerAccessLog to parse web server

___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [metrics-lib/release] Add log line interfaces and access methods.

2018-02-26 Thread karsten
commit 59689a9fa4c162378f347902eb68e4c21ccf0043
Author: iwakeh 
Date:   Tue Feb 6 14:59:05 2018 +

Add log line interfaces and access methods.

For both the general LogDescriptor and extension WebServerAccessLog.
Include some new tests.
---
 .../org/torproject/descriptor/LogDescriptor.java   | 12 ++
 .../torproject/descriptor/{log => }/Method.java|  5 ++--
 .../torproject/descriptor/WebServerAccessLog.java  | 28 ++
 .../descriptor/log/WebServerAccessLogImpl.java | 20 
 .../descriptor/log/WebServerAccessLogLine.java | 15 ++--
 .../descriptor/log/LogDescriptorTest.java  | 15 +++-
 6 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java 
b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index ff02cae..6a6bf84 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -43,5 +43,17 @@ public interface LogDescriptor extends Descriptor {
   @Override
   public List getUnrecognizedLines();
 
+  /**
+   * Returns a list of all parseable log lines.
+   * Might require a lot of memory depending on log size.
+   */
+  public List logLines() throws DescriptorParseException;
+
+  public interface Line {
+
+/** Returns a log line string. */
+public String toLogString();
+
+  }
 }
 
diff --git a/src/main/java/org/torproject/descriptor/log/Method.java 
b/src/main/java/org/torproject/descriptor/Method.java
similarity index 50%
rename from src/main/java/org/torproject/descriptor/log/Method.java
rename to src/main/java/org/torproject/descriptor/Method.java
index c29d495..9135fe2 100644
--- a/src/main/java/org/torproject/descriptor/log/Method.java
+++ b/src/main/java/org/torproject/descriptor/Method.java
@@ -1,8 +1,9 @@
 /* Copyright 2018 The Tor Project
  * See LICENSE for licensing information */
 
-package org.torproject.descriptor.log;
+package org.torproject.descriptor;
 
-public  enum Method {
+/** Enum for web server access log methods. */
+public enum Method {
 GET, HEAD, POST;
 }
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java 
b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b94bc30..b4f1940 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
 
 import java.time.LocalDate;
 import java.util.List;
+import java.util.Optional;
 
 /**
  * Contains a sanitized web server access log file from a {@code 
torproject.org}
@@ -61,5 +62,32 @@ public interface WebServerAccessLog extends LogDescriptor {
   @Override
   public List getUnrecognizedLines();
 
+  public interface Line extends LogDescriptor.Line {
+
+/** Returns the IP address of the requesting host. */
+public String getIp();
+
+/** Returns the HTTP method, e.g., GET. */
+public Method getMethod();
+
+/** Returns the protocol and version, e.g., HTTP/1.1. */
+public String getProtocol();
+
+/** Returns the requested resource. */
+public String getRequest();
+
+/** Returns the size of the response in bytes, if available. */
+public Optional getSize();
+
+/** Returns the final status code, e.g., 200. */
+public int getResponse();
+
+/** Returns the date when the request was received. */
+public LocalDate getDate();
+
+/** True, if this is a valid web server access log line. */
+public boolean isValid();
+  }
+
 }
 
diff --git 
a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java 
b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index f02b1d7..7b56528 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -10,12 +10,17 @@ import org.torproject.descriptor.internal.FileType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.InputStreamReader;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
 import java.util.Collection;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 /**
  * Implementation of web server access log descriptors.
@@ -126,5 +131,20 @@ public class WebServerAccessLogImpl extends 
LogDescriptorImpl
 return this.logDate;
   }
 
+  /** Returns a list of all valid log lines. */
+  @Override
+  public List logLines()
+  throws DescriptorParseException {
+try (BufferedReader br
+= new BufferedReader(new InputStreamReader(new ByteArrayInputStream(
+this.getRawDescriptorBytes() {
+  return br.lines().map(line
+   

[tor-commits] [metrics-lib/master] Prepare for 2.2.0 release, again.

2018-02-26 Thread karsten
commit 72f62fbe48d7cbb2de6d7fd5fd38b867fb0b185e
Author: Karsten Loesing 
Date:   Mon Feb 26 14:12:31 2018 +0100

Prepare for 2.2.0 release, again.
---
 CERT | 20 ++--
 CHANGELOG.md |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/CERT b/CERT
index 0f90f7a..43be56e 100644
--- a/CERT
+++ b/CERT
@@ -1,8 +1,8 @@
 -BEGIN CERTIFICATE-
-MIIDaTCCAlGgAwIBAgIEM/uNRzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
+MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
 UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU
-b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw
-NTE2MTQ0NDAwWhcNMTcwODE0MTQ0NDAwWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
+b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw
+MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
 CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj
 dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB
 AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU
@@ -11,11 +11,11 @@ 
Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE
 MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD
 oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX
 lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud
-DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEACxUk
-6eZkayjJGZjw02bWjrkwwe/N5iOnOz9XcpGQY5lUEk12uIYCYHDB66SsZngsbwNQ
-qZ8T6Avm9XSrKAKMxBcPsjTUy0Fb7yG3jLRypPlbgoe61YoliYYaWiaV1OQY1nsZ
-ujt6f7T5bwsLeh7La7+kB1SGDu+hKH7Bi3RtVN09iL7x+BJVuJOKHKYoK+UO2fB3
-ltFenkGpiDzGQQxJRzA1WbCQsSPrE6Qi3NmWXsTAc+24Y9gTO8qlmfi6zp1W7TGO
-+cUmHIO0u3+ewkb8oIj8jjzGTmEbuJrwgBOohQr6CXCicQHhIrBF6Opi7mF/UHSO
-J8d+DLvT41EL9JrgCg==
+DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU
+qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x
+RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq
+MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO
+zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG
+qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW
+2DiJiA8BLxZToe2XDA==
 -END CERTIFICATE-
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 91f00b8..4d87ba4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# Changes in version 2.2.0 - 2018-01-31
+# Changes in version 2.2.0 - 2018-02-26
 
  * Major changes
- Add new descriptor type WebServerAccessLog to parse web server



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [metrics-lib/master] Bump version to 2.2.0-dev.

2018-02-26 Thread karsten
commit 7648780844f809f3068d3e625973fe43c78ccbb3
Author: Karsten Loesing 
Date:   Mon Feb 26 16:24:25 2018 +0100

Bump version to 2.2.0-dev.
---
 build.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.xml b/build.xml
index 3f081f7..199ba7a 100644
--- a/build.xml
+++ b/build.xml
@@ -6,7 +6,7 @@
 
 
 
-  
+  
   
   
   

___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/master] Bump version to 1.5.0-dev.

2018-02-26 Thread karsten
commit ef1dfb6d32d5bb42a95052cd44790b6c586f38c9
Author: Karsten Loesing 
Date:   Mon Feb 26 16:24:49 2018 +0100

Bump version to 1.5.0-dev.
---
 build.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.xml b/build.xml
index ff8302b..fb219fa 100644
--- a/build.xml
+++ b/build.xml
@@ -8,7 +8,7 @@
 
   
   
-  
+  
   
   
   

___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/master] Prepare for 1.5.0 release.

2018-02-26 Thread karsten
commit ddfa7bad243dc8e0a3105b14418794c87786e88f
Author: Karsten Loesing 
Date:   Mon Feb 26 14:23:05 2018 +0100

Prepare for 1.5.0 release.
---
 CERT | 20 ++--
 CHANGELOG.md |  2 +-
 build.xml|  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/CERT b/CERT
index d94c0c3..43be56e 100644
--- a/CERT
+++ b/CERT
@@ -1,8 +1,8 @@
 -BEGIN CERTIFICATE-
-MIIDaTCCAlGgAwIBAgIEZTniETANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
+MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
 UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU
-b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw
-ODE3MTg1MDQ0WhcNMTcxMTE1MTg1MDQ0WjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
+b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw
+MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
 CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj
 dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB
 AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU
@@ -11,11 +11,11 @@ 
Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE
 MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD
 oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX
 lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud
-DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAbsAc
-gwl5KJH3pVKw4b+ACCOMgW+27MisCFbT1Izq2Wx+JcLMt3N//MoIpYOZWhsIeazW
-/NE0fNbkLi0IYA0F1nUC9pHl44Hd8Gjfqa/YQUi9ALtgsY7l6W0sceW8WnZ8bu8J
-DfrqnmB0bD2xc9ZjOn58al8dVjVWs95M87D9WCRU6LiaKFj5c45wciABQsTmC0qD
-pyHYOaSGtXxXKDw5pAntdtHkCbowV5tDi/QQ8Tg7i5O7xwSh71Q7TZiNFMpLomBL
-QllHfTZryFmoHyGn5MfngBUVCVHig5nXmk0dUMGuLiK4789dkgiPRz0vpB5Yf8Yy
-CCE2jB6VBi2g5fMx0w==
+DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU
+qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x
+RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq
+MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO
+zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG
+qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW
+2DiJiA8BLxZToe2XDA==
 -END CERTIFICATE-
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a0b5d1f..eb14839 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# Changes in version 1.5.0 - 2018-01-31
+# Changes in version 1.5.0 - 2018-02-26
 
  * Major changes
- Update to metrics-lib 2.2.0.
diff --git a/build.xml b/build.xml
index 48f6e33..ff8302b 100644
--- a/build.xml
+++ b/build.xml
@@ -8,7 +8,7 @@
 
   
   
-  
+  
   
   
   



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [collector/master] Circumvent Collection (integer) size limit.

2018-02-26 Thread karsten
commit d05b4e4aee3bc15c3e4d5bac660dfcee5bc26279
Author: iwakeh 
Date:   Tue Feb 20 16:30:14 2018 +

Circumvent Collection (integer) size limit.

Clean log lines immediately when they are read and also make use of 
sanitized
log's high redundancy immediately, i.e., continue with maps of
>.

Rename method(s) to reflect what they do.
---
 .../collector/webstats/SanitizeWeblogs.java| 89 --
 1 file changed, 65 insertions(+), 24 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 1f2e922..5a270dd 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -4,8 +4,10 @@
 package org.torproject.collector.webstats;
 
 import static java.util.stream.Collectors.counting;
+import static java.util.stream.Collectors.groupingBy;
 import static java.util.stream.Collectors.groupingByConcurrent;
-import static java.util.stream.Collectors.toList;
+import static java.util.stream.Collectors.reducing;
+import static java.util.stream.Collectors.summingLong;
 
 import org.torproject.collector.conf.Configuration;
 import org.torproject.collector.conf.ConfigurationException;
@@ -35,6 +37,8 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -111,35 +115,36 @@ public class SanitizeWeblogs extends CollecTorMain {
   : virtualEntry.getValue().entrySet()) {
 String physicalHost = physicalEntry.getKey();
 log.info("Processing logs for {} on {}.", virtualHost, physicalHost);
-Map linesByDate
+Map> linesByDate
 = physicalEntry.getValue().values().stream().parallel()
-.flatMap((LogMetadata metadata) -> lineStream(metadata)
-   .filter((line) -> line.isValid())).parallel()
-.collect(groupingByConcurrent(WebServerAccessLogLine::getDate));
+.flatMap(metadata -> sanitzedLineStream(metadata).entrySet()
+.stream())
+.collect(groupingBy(Map.Entry::getKey,
+  reducing(Collections.emptyMap(), Map.Entry::getValue,
+(e1, e2) -> Stream.concat(e1.entrySet().stream(), e2.entrySet()
+  .stream())
+  .collect(groupingByConcurrent(Map.Entry::getKey,
+  summingLong(Map.Entry::getValue));
 LocalDate[] interval = determineInterval(linesByDate.keySet());
 linesByDate.entrySet().stream()
 .filter((entry) -> entry.getKey().isAfter(interval[0])
   && entry.getKey().isBefore(interval[1])).parallel()
-.forEach((entry) -> storeSanitized(virtualHost, physicalHost,
+.forEach((entry) -> storeSortedAndForget(virtualHost, physicalHost,
   entry.getKey(), entry.getValue()));
   }
 }
   }
 
-  private void storeSanitized(String virtualHost, String physicalHost,
-  LocalDate date, List lines) {
+  private void storeSortedAndForget(String virtualHost, String physicalHost,
+  LocalDate date, Map lineCounts) {
 String name = new StringJoiner(InternalLogDescriptor.SEP)
 .add(virtualHost).add(physicalHost)
 .add(InternalWebServerAccessLog.MARKER)
 .add(date.format(DateTimeFormatter.BASIC_ISO_DATE))
 .toString() + "." + FileType.XZ.name().toLowerCase();
-log.debug("Sanitizing {}.", name);
-Map retainedLines = new TreeMap<>(lines
-.stream().parallel().map((line) -> sanitize(line, date))
-.filter((line) -> line.isPresent())
-.map((line) -> line.get())
-.collect(groupingByConcurrent(line -> line, counting(;
-lines.clear(); // not needed anymore
+log.debug("Storing {}.", name);
+Map retainedLines = new TreeMap<>(lineCounts);
+lineCounts.clear(); // not needed anymore
 try {
   WebServerAccessLogPersistence walp
   = new WebServerAccessLogPersistence(
@@ -187,8 +192,8 @@ public class SanitizeWeblogs extends CollecTorMain {
 .collect(Collectors.joining("\n", "", "\n")).getBytes();
   }
 
-  static Optional sanitize(WebServerAccessLogLine logLine,
-  LocalDate date) {
+  static Optional
+  sanitize(WebServerAccessLogLine logLine) {
 if (!logLine.isValid()
 || !(Method.GET == logLine.getMethod()
  || Method.HEAD == logLine.getMethod())
@@ -203,10 +208,13 @@ public class SanitizeWeblogs extends CollecTorMain {
 if (queryStart > 0) {
   logLine.setRequest(logLine.getRequest().substring(0, queryStart));
 }
-  

[tor-commits] [collector/master] Reduce memory footprint and wall time.

2018-02-26 Thread karsten
commit 8557bf6255e6e3745088033e8e7bad7801421686
Author: iwakeh 
Date:   Tue Feb 20 16:30:09 2018 +

Reduce memory footprint and wall time.

Adapt to latest changes of metrics-lib (task-25329) and make use of the high
redundancy of logs (e.g. a 3G file might only contain 350 different lines).
This avoids OOM and array out of bounds exceptions for large files (>2G) and
gives a speed-up of roughly 50%. (The earlier 66min are down to 34min for
meronense files plus two larger files.)

There is a BATCH constant, which could be tuned for processing speed. It is
logged for each webstats module run.  Currently, it is set to 100k.  This
was more or less arbitrarily chosen and used for all the tests.  A test run
using 500k didn't show significant differences.
---
 .../persist/WebServerAccessLogPersistence.java |  8 ---
 .../collector/webstats/SanitizeWeblogs.java| 61 ++
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git 
a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
 
b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
index 792d3a9..dab4112 100644
--- 
a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
+++ 
b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
@@ -5,7 +5,6 @@ package org.torproject.collector.persist;
 
 import org.torproject.descriptor.WebServerAccessLog;
 import org.torproject.descriptor.internal.FileType;
-import org.torproject.descriptor.log.InternalLogDescriptor;
 import org.torproject.descriptor.log.InternalWebServerAccessLog;
 
 import org.slf4j.Logger;
@@ -30,13 +29,6 @@ public class WebServerAccessLogPersistence
   /** Prepare storing the given descriptor. */
   public WebServerAccessLogPersistence(WebServerAccessLog desc) {
 super(desc, new byte[0]);
-byte[] compressedBytes = null;
-try { // The descriptor bytes have to be stored compressed.
-  compressedBytes = COMPRESSION.compress(desc.getRawDescriptorBytes());
-  ((InternalLogDescriptor)desc).setRawDescriptorBytes(compressedBytes);
-} catch (Exception ex) {
-  log.warn("Cannot compress ’{}’.  Storing uncompressed.", ex);
-}
 calculatePaths();
   }
 
diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 7601898..1f2e922 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -3,6 +3,7 @@
 
 package org.torproject.collector.webstats;
 
+import static java.util.stream.Collectors.counting;
 import static java.util.stream.Collectors.groupingByConcurrent;
 import static java.util.stream.Collectors.toList;
 
@@ -17,6 +18,7 @@ import 
org.torproject.collector.persist.WebServerAccessLogPersistence;
 import org.torproject.descriptor.DescriptorParseException;
 import org.torproject.descriptor.Method;
 import org.torproject.descriptor.WebServerAccessLog;
+import org.torproject.descriptor.internal.FileType;
 import org.torproject.descriptor.log.InternalLogDescriptor;
 import org.torproject.descriptor.log.InternalWebServerAccessLog;
 import org.torproject.descriptor.log.WebServerAccessLogImpl;
@@ -26,8 +28,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.InputStreamReader;
+import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.LocalDate;
@@ -40,6 +43,7 @@ import java.util.SortedSet;
 import java.util.StringJoiner;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
@@ -87,6 +91,7 @@ public class SanitizeWeblogs extends CollecTorMain {
   Set sources = this.config.getSourceTypeSet(
   Key.WebstatsSources);
   if (sources.contains(SourceType.Local)) {
+log.info("Processing logs using batch value {}.", BATCH);
 findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins));
 PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath));
   }
@@ -126,24 +131,60 @@ public class SanitizeWeblogs extends CollecTorMain {
 String name = new StringJoiner(InternalLogDescriptor.SEP)
 .add(virtualHost).add(physicalHost)
 .add(InternalWebServerAccessLog.MARKER)
-.add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString();
+.add(date.format(DateTimeFormatter.BASIC_ISO_DATE))
+.toString() + "." + FileType.XZ.name().toLowerCase();
 log.debug("Sanitizing {}.", name);
-List retainedLines = lines
+Map retainedLines = new TreeMap<>(lines
 .stream().parallel().map((line) -> 

[tor-commits] [collector/master] Adapt CollecTor to latest metrics-lib master branch.

2018-02-26 Thread karsten
commit fbb35f75da022a23912b937b1825d8f216abad07
Author: iwakeh 
Date:   Tue Feb 20 16:30:08 2018 +

Adapt CollecTor to latest metrics-lib master branch.
---
 src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java 
b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 53ad118..7601898 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -15,10 +15,10 @@ import org.torproject.collector.cron.CollecTorMain;
 import org.torproject.collector.persist.PersistenceUtils;
 import org.torproject.collector.persist.WebServerAccessLogPersistence;
 import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.Method;
 import org.torproject.descriptor.WebServerAccessLog;
 import org.torproject.descriptor.log.InternalLogDescriptor;
 import org.torproject.descriptor.log.InternalWebServerAccessLog;
-import org.torproject.descriptor.log.Method;
 import org.torproject.descriptor.log.WebServerAccessLogImpl;
 import org.torproject.descriptor.log.WebServerAccessLogLine;
 



___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits


[tor-commits] [metrics-lib/master] Accomodate logs with more than Integer.MAX_VALUE lines.

2018-02-26 Thread karsten
commit 35feb816f81f26bcc9dc035a1aaf496c34a86647
Author: iwakeh 
Date:   Fri Feb 16 09:05:46 2018 +

Accomodate logs with more than Integer.MAX_VALUE lines.

Implements task-23046.
---
 .../org/torproject/descriptor/LogDescriptor.java   | 10 +--
 .../torproject/descriptor/WebServerAccessLog.java  |  6 
 .../descriptor/log/WebServerAccessLogImpl.java | 32 ++
 .../descriptor/log/LogDescriptorTest.java  |  5 +++-
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java 
b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index 826fcda..8dd8460 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
 
 import java.io.InputStream;
 import java.util.List;
+import java.util.stream.Stream;
 
 /**
  * Contains a log file.
@@ -64,11 +65,14 @@ public interface LogDescriptor extends Descriptor {
   public List getUnrecognizedLines();
 
   /**
-   * Returns a list of all parseable log lines.
-   * Might require a lot of memory depending on log size.
+   * Returns a stream of all parseable log lines.
+   * Depending on log size this might not fit into a collection type.
+   *
+   * @since 2.2.0
*/
-  public List logLines() throws DescriptorParseException;
+  public Stream logLines() throws DescriptorParseException;
 
+  /** Base interface for accessing log lines. */
   public interface Line {
 
 /** Returns a log line string. */
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java 
b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b4f1940..5f3ad73 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -62,6 +62,12 @@ public interface WebServerAccessLog extends LogDescriptor {
   @Override
   public List getUnrecognizedLines();
 
+  /**
+   * Facilitates access to all log line fields that don't only contain
+   * default values post sanitization.
+   *
+   * @since 2.2.0
+   */
   public interface Line extends LogDescriptor.Line {
 
 /** Returns the IP address of the requesting host. */
diff --git 
a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java 
b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index e48a262..3666d5d 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -15,10 +15,11 @@ import java.io.File;
 import java.io.InputStreamReader;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Implementation of web server access log descriptors.
@@ -128,15 +129,34 @@ public class WebServerAccessLogImpl extends 
LogDescriptorImpl
 return this.logDate;
   }
 
-  /** Returns a list of all valid log lines. */
+  private static final int LISTLIMIT = Integer.MAX_VALUE / 2;
+
+  /** Returns a stream of all valid log lines. */
   @Override
-  public List logLines()
+  public Stream logLines()
   throws DescriptorParseException {
 try (BufferedReader br = new BufferedReader(new InputStreamReader(
 this.decompressedByteStream( {
-  return br.lines().map(line
-  -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line))
-.filter(line -> line.isValid()).collect(Collectors.toList());
+  List lists = new ArrayList<>();
+  List currentList = new ArrayList<>();
+  lists.add(currentList);
+  String lineStr = br.readLine();
+  int count = 0;
+  while (null != lineStr) {
+WebServerAccessLogLine wsal = WebServerAccessLogLine.makeLine(lineStr);
+if (wsal.isValid()) {
+  currentList.add(wsal);
+  count++;
+}
+if (count >= LISTLIMIT) {
+  currentList = new ArrayList<>();
+  lists.add(currentList);
+  count = 0;
+}
+lineStr = br.readLine();
+  }
+  br.close();
+  return lists.stream().flatMap(list -> list.stream());
 } catch (Exception ex) {
   throw new DescriptorParseException("Cannot retrieve log lines.", ex);
 }
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java 
b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index 67ba638..0ff3e62 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -3,6 +3,8 @@
 
 package org.torproject.descriptor.log;
 
+import static java.util.stream.Collectors.toList;

[tor-commits] [metrics-lib/master] Enable handling of larger (> 2G) log files.

2018-02-26 Thread karsten
commit c01dfbdc4e5d817fb10fb45f79d8223853cdeac0
Author: iwakeh 
Date:   Fri Feb 16 09:05:46 2018 +

Enable handling of larger (> 2G) log files.

As log files can be compressed very efficiently log descriptor raw bytes 
contain
the compressed bytes.

Added methods for accessing uncompressed log content, as well as stream 
based
methods for decompression and compression in class FileType.
Adapted all tests to the changes.

Implements task-25329.
---
 .../org/torproject/descriptor/LogDescriptor.java   | 26 --
 .../torproject/descriptor/internal/FileType.java   | 18 ++
 .../descriptor/log/LogDescriptorImpl.java  | 25 --
 .../descriptor/log/WebServerAccessLogImpl.java | 12 +++
 .../descriptor/log/LogDescriptorTest.java  | 40 +++---
 .../descriptor/log/WebServerModuleTest.java|  4 ++-
 6 files changed, 82 insertions(+), 43 deletions(-)

diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java 
b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index 6a6bf84..826fcda 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -3,21 +3,34 @@
 
 package org.torproject.descriptor;
 
+import java.io.InputStream;
 import java.util.List;
 
 /**
  * Contains a log file.
  *
  * Unlike other descriptors, logs can get very large and are typically 
stored
- * on disk in compressed form. However, all access to log contents through this
- * interface and its subinterfaces is made available in uncompressed form.
+ * on disk in compressed form. Access to log contents through this
+ * interface and its subinterfaces is made available in compressed and
+ * decompressed form:
+ * 
+ * The raw descriptor bytes are compressed, because logs contain
+ * often redundant information that can achieve high compression rates.
+ * For example, a 500kB compressed log file might be deflated to 3GB.
+ * The uncompressed log contents can be accessed as a stream of bytes.
+ * A list of log lines (decompressed) can be retrieved.
+ * 
+ * 
  *
  * @since 2.2.0
  */
 public interface LogDescriptor extends Descriptor {
 
   /**
-   * Returns the decompressed raw descriptor bytes of the log.
+   * Returns the raw compressed descriptor bytes of the log.
+   *
+   * For access to the log's decompressed bytes of
+   * use method {@code decompressedByteStream}.
*
* @since 2.2.0
*/
@@ -25,6 +38,13 @@ public interface LogDescriptor extends Descriptor {
   public byte[] getRawDescriptorBytes();
 
   /**
+   * Returns the decompressed raw descriptor bytes of the log as stream.
+   *
+   * @since 2.2.0
+   */
+  public InputStream decompressedByteStream() throws DescriptorParseException;
+
+  /**
* Returns annotations found in the log file, which may be an empty List if a
* log format does not support adding annotations.
*
diff --git a/src/main/java/org/torproject/descriptor/internal/FileType.java 
b/src/main/java/org/torproject/descriptor/internal/FileType.java
index 353f0bb..2c07df6 100644
--- a/src/main/java/org/torproject/descriptor/internal/FileType.java
+++ b/src/main/java/org/torproject/descriptor/internal/FileType.java
@@ -93,6 +93,24 @@ public enum FileType {
   }
 
   /**
+   * Compresses the given InputStream and returns an OutputStream.
+   *
+   * @since 2.2.0
+   */
+  public OutputStream compress(OutputStream os) throws Exception {
+return this.outputStream(os);
+  }
+
+  /**
+   * Decompresses the given InputStream and returns an OutputStream.
+   *
+   * @since 2.2.0
+   */
+  public InputStream decompress(InputStream is) throws Exception {
+return this.inputStream(is);
+  }
+
+  /**
* Decompresses the given bytes in memory and returns the decompressed bytes.
*
* @since 2.2.0
diff --git a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java 
b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
index 97854e4..3583d26 100644
--- a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
@@ -14,10 +14,10 @@ import org.slf4j.LoggerFactory;
 import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -76,8 +76,7 @@ public abstract class LogDescriptorImpl
   this.fileType = FileType.findType(mat.group(1).toUpperCase());
   if (FileType.PLAIN == this.fileType) {
 this.fileType = defaultCompression;
-  } else {
-this.logBytes = this.fileType.decompress(this.logBytes);
+this.logBytes = this.fileType.compress(this.logBytes);
   }
 } catch 

[tor-commits] [tor-browser-build/master] Bug 25336: Bump obfs4proxy to 0.0.7

2018-02-26 Thread gk
commit 4bed9a85478b6fb16e0d654589d8cb8ed3865027
Author: Georg Koppen 
Date:   Mon Feb 26 08:26:20 2018 +

Bug 25336: Bump obfs4proxy to 0.0.7

0.0.7 has long been out and OnionShare needs its meek_lite feature
for macOS. Let's update to this latest stable obfs4 version then.
---
 projects/obfs4/config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/obfs4/config b/projects/obfs4/config
index 44db79d..916887f 100644
--- a/projects/obfs4/config
+++ b/projects/obfs4/config
@@ -1,5 +1,5 @@
 # vim: filetype=yaml sw=2
-version: 0.0.5
+version: 0.0.7
 git_url: https://git.torproject.org/pluggable-transports/obfs4.git
 git_hash: 'obfs4proxy-[% c("version") %]'
 tag_gpg_id: 1

___
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits