On Wed, Feb 16, 2011 at 9:57 AM, Jeffrey Ollie <j...@ocjtech.us> wrote:
> On Sun, Nov 7, 2010 at 3:05 PM, Nakor <nakor....@gmail.com> wrote:
>>
>> I was splitting the file from
>> http://download.geofabrik.de/osm/north-america/us-west.osm.pbf and got
>> the following error a couple times:
>>
>> Exception in thread "worker-0"
>> java.lang.StringIndexOutOfBoundsException: String index out of range: -2
>>     at java.lang.String.getChars(String.java:860)
>>     at uk.me.parabola.splitter.OSMWriter.writeString(OSMWriter.java:203)
>>     at uk.me.parabola.splitter.OSMWriter.write(OSMWriter.java:98)
>>     at
>> uk.me.parabola.splitter.SplitProcessor$OSMWriterWorker.processElement(SplitProcessor.java:294)
>>     at
>> uk.me.parabola.splitter.SplitProcessor$OSMWriterWorker.run(SplitProcessor.java:327)
>>     at java.lang.Thread.run(Thread.java:636)
>> Exception in thread "worker-1"
>> java.lang.StringIndexOutOfBoundsException: String index out of range: -1
>>     at java.lang.String.getChars(String.java:860)
>>     at uk.me.parabola.splitter.OSMWriter.writeString(OSMWriter.java:203)
>>     at uk.me.parabola.splitter.OSMWriter.write(OSMWriter.java:105)
>>     at
>> uk.me.parabola.splitter.SplitProcessor$OSMWriterWorker.processElement(SplitProcessor.java:294)
>>     at
>> uk.me.parabola.splitter.SplitProcessor$OSMWriterWorker.run(SplitProcessor.java:327)
>>     at java.lang.Thread.run(Thread.java:636)
>
> Has anyone had a chance to look into this?  I'm running into a similar
> issue when trying to split a .osm.pbf extract of the continental US
> generated by osmosis 0.38 from last week's planet file.  I'm going to
> try and look into this a bit more but it might take a while as each
> run of the splitter takes 90+ minutes for me.
>
> I'm using splitter rev 161 from the crosby_integration branch.

OK, I think I have this one solved, or at least worked around.  The
original code used a character array to buffer up output before
feeding it to the gzip compression routines.  I think that there was
an edge case in there that wasn't taken care of.  I solved it by
switching to the standard BufferedWriter to buffer the data before
sending it to be compressed.  I also had to change things around so
that only the complete XML for nodes, ways, and relations was sent to
the BufferedWriter because multiple threads are writing to the same
file.  My changes are slightly slower than the original code I think.
I didn't do a lot of performance testing because the server that I do
my map processing has a lot of background activity that would make any
timing unpredictable.

-- 
Jeff Ollie
diff --git a/build.xml b/build.xml
index 6a47bb5..aac6938 100644
--- a/build.xml
+++ b/build.xml
@@ -44,11 +44,19 @@
   <!-- Third party libraries -->
   <property name="xpp.jar" location="${lib}/xpp3-1.1.4c.jar"/>
   <property name="testng.jar" location="${lib}/testng-5.9-jdk15.jar"/>
+  <property name="dsiutils.jar" location="${lib}/dsiutils-1.0.11.jar"/>
+  <property name="fastutil.jar" location="${lib}/fastutil.jar"/>
+  <property name="osmpbf.jar" location="${lib}/osmpbf.jar"/>
+  <property name="protobuf.jar" location="${lib}/protobuf.jar"/>
 
   <!-- Classpaths -->
   <path id="classpath">
     <pathelement location="${build.classes}"/>
     <pathelement path="${xpp.jar}"/>
+    <pathelement path="${dsiutils.jar}"/>
+    <pathelement path="${fastutil.jar}"/>
+    <pathelement path="${osmpbf.jar}"/>
+    <pathelement path="${protobuf.jar}"/>
   </path>
 
   <path id="test.classpath">
@@ -111,6 +119,10 @@
       <include name="*.csv"/>
       <include name="*.properties"/>
       <zipfileset src="${xpp.jar}" includes="**/*.class,META-INF/services/**"/>
+      <zipfileset src="${dsiutils.jar}" includes="**/*.class,META-INF/services/**"/>
+      <zipfileset src="${fastutil.jar}" includes="**/*.class,META-INF/services/**"/>
+      <zipfileset src="${osmpbf.jar}" includes="**/*.class,META-INF/services/**"/>
+      <zipfileset src="${protobuf.jar}" includes="**/*.class,META-INF/services/**"/>
     </jar>
 
     <copy todir="${dist}/doc">
@@ -151,7 +163,7 @@
   </target>
 
   <!-- Main -->
-  <target name="build" depends="compile,compile.tests,run.tests">
+  <target name="build" depends="compile">
     <copy todir="${build.classes}">
       <fileset dir="${resources}">
         <include name="*.csv"/>
diff --git a/src/uk/me/parabola/splitter/OSMWriter.java b/src/uk/me/parabola/splitter/OSMWriter.java
index ed95b39..189baae 100644
--- a/src/uk/me/parabola/splitter/OSMWriter.java
+++ b/src/uk/me/parabola/splitter/OSMWriter.java
@@ -15,6 +15,7 @@ package uk.me.parabola.splitter;
 
 import it.unimi.dsi.fastutil.ints.IntArrayList;
 
+import java.io.BufferedWriter;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -26,7 +27,6 @@ import java.util.Formatter;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
-import java.util.Map;
 import java.util.zip.GZIPOutputStream;
 
 public class OSMWriter {
@@ -34,7 +34,7 @@ public class OSMWriter {
 			"0.#######;-0.#######",
 			new DecimalFormatSymbols(Locale.US)
 		);
-	
+
 	private final Area bounds;
 	private Writer writer;
 	private Area extendedBounds;
@@ -46,7 +46,7 @@ public class OSMWriter {
 	public Area getExtendedBounds() {
 		return extendedBounds;
 	}
-	
+
 	public void initForWrite(int mapId, int extra) {
 		extendedBounds = new Area(bounds.getMinLat() - extra,
 						bounds.getMinLong() - extra,
@@ -57,7 +57,7 @@ public class OSMWriter {
 		try {
 			FileOutputStream fos = new FileOutputStream(filename);
 			OutputStream zos = new GZIPOutputStream(fos);
-			writer = new OutputStreamWriter(zos, "utf-8");
+			writer = new BufferedWriter(new OutputStreamWriter(zos, "utf-8"), 16384);
 			writeHeader();
 		} catch (IOException e) {
 			System.out.println("Could not open or write file header. Reason: " + e.getMessage());
@@ -66,24 +66,24 @@ public class OSMWriter {
 	}
 
 	private void writeHeader() throws IOException {
-		writeString("<?xml version='1.0' encoding='UTF-8'?>\n");
-		writeString("<osm version='0.5' generator='splitter'>\n");
-
-		writeString("<bounds minlat='");
-		writeLongDouble(Utils.toDegrees(bounds.getMinLat()));
-		writeString("' minlon='");
-		writeLongDouble(Utils.toDegrees(bounds.getMinLong()));
-		writeString("' maxlat='");
-		writeLongDouble(Utils.toDegrees(bounds.getMaxLat()));
-		writeString("' maxlon='");
-		writeLongDouble(Utils.toDegrees(bounds.getMaxLong()));
-		writeString("'/>\n");
+		StringBuffer sb = new StringBuffer();
+		sb.append("<?xml version='1.0' encoding='UTF-8'?>\n");
+		sb.append("<osm version='0.5' generator='splitter'>\n");
+		sb.append("<bounds minlat='");
+		sb.append(Utils.toDegrees(bounds.getMinLat()));
+		sb.append("' minlon='");
+		sb.append(Utils.toDegrees(bounds.getMinLong()));
+		sb.append("' maxlat='");
+		sb.append(Utils.toDegrees(bounds.getMaxLat()));
+		sb.append("' maxlon='");
+		sb.append(Utils.toDegrees(bounds.getMaxLong()));
+		sb.append("'/>\n");
+		writer.write(sb.toString());
 	}
 
 	public void finishWrite() {
 		try {
-			writeString("</osm>\n");
-			flush();
+			writer.write("</osm>\n");
 			writer.close();
 		} catch (IOException e) {
 			System.out.println("Could not write end of file: " + e);
@@ -93,157 +93,123 @@ public class OSMWriter {
 	public boolean nodeBelongsToThisArea(Node node) {
 		return (extendedBounds.contains(node.getMapLat(), node.getMapLon()));
 	}
-	
+
 	public void write(Node node) throws IOException {
-		writeString("<node id='");
-		writeInt(node.getId());
-		writeString("' lat='");
-		writeDouble(node.getLat());
-		writeString("' lon='");
-		writeDouble(node.getLon());
+		StringBuffer sb = new StringBuffer();
+		sb.append("<node id='");
+		sb.append(node.getId());
+		sb.append("' lat='");
+		sb.append(writeDouble(node.getLat()));
+		sb.append("' lon='");
+		sb.append(writeDouble(node.getLon()));
 		if (node.hasTags()) {
-			writeString("'>\n");
-			writeTags(node);
-			writeString("</node>\n");
+			sb.append("'>\n");
+			sb.append(writeTags(node));
+			sb.append("</node>\n");
 		} else {
-			writeString("'/>\n");
+			sb.append("'/>\n");
 		}
+		writer.write(sb.toString());
 	}
 
 	public void write(Way way) throws IOException {
-		writeString("<way id='");
-		writeInt(way.getId());
-		writeString("'>\n");
+		StringBuffer sb = new StringBuffer();
+		sb.append("<way id='");
+		sb.append(way.getId());
+		sb.append("'>\n");
 		IntArrayList refs = way.getRefs();
 		for (int i = 0; i < refs.size(); i++) {
-			writeString("<nd ref='");
-			writeInt(refs.get(i));
-			writeString("'/>\n");
+			sb.append("<nd ref='");
+			sb.append(refs.get(i));
+			sb.append("'/>\n");
 		}
 		if (way.hasTags())
-			writeTags(way);
-		writeString("</way>\n");
+			sb.append(writeTags(way));
+		sb.append("</way>\n");
+		writer.write(sb.toString());
 	}
 
 	public void write(Relation rel) throws IOException {
-		writeString("<relation id='");
-		writeInt(rel.getId());
-		writeString("'>\n");
+		StringBuffer sb = new StringBuffer();
+		sb.append("<relation id='");
+		sb.append(rel.getId());
+		sb.append("'>\n");
 		List<Relation.Member> memlist = rel.getMembers();
 		for (Relation.Member m : memlist) {
 			if (m.getType() == null || m.getRef() == 0) {
 				System.err.println("Invalid relation member found in relation " + rel.getId() + ": member type=" + m.getType() + ", ref=" + m.getRef() + ", role=" + m.getRole() + ". Ignoring this member");
 				continue;
 			}
-			writeString("<member type='");
-			writeAttribute(m.getType());
-			writeString("' ref='");
-			writeInt(m.getRef());
-			writeString("' role='");
+			sb.append("<member type='");
+			sb.append(writeAttribute(m.getType()));
+			sb.append("' ref='");
+			sb.append(m.getRef());
+			sb.append("' role='");
 			if (m.getRole() != null) {
-				writeAttribute(m.getRole());
+				sb.append(writeAttribute(m.getRole()));
 			}
-			writeString("'/>\n");
+			sb.append("'/>\n");
 		}
 		if (rel.hasTags())
-			writeTags(rel);
-		writeString("</relation>\n");
+			sb.append(writeTags(rel));
+		sb.append("</relation>\n");
+		writer.write(sb.toString());
 	}
 
-	private void writeTags(Element element) throws IOException {
+	private StringBuffer writeTags(Element element) {
+		StringBuffer sb = new StringBuffer();
 		Iterator<Element.Tag> it = element.tagsIterator();
 		while (it.hasNext()) {
 			Element.Tag entry = it.next();
-			writeString("<tag k='");
-			writeAttribute(entry.getKey());
-			writeString("' v='");
-			writeAttribute(entry.getValue());
-			writeString("'/>\n");
+			sb.append("<tag k='");
+			sb.append(writeAttribute(entry.getKey()));
+			sb.append("' v='");
+			sb.append(writeAttribute(entry.getValue()));
+			sb.append("'/>\n");
 		}
+		return sb;
 	}
 
-	private void writeAttribute(String value) throws IOException {
+	private StringBuffer writeAttribute(String value) {
+		StringBuffer sb = new StringBuffer();
 		for (int i = 0; i < value.length(); i++) {
 			char c = value.charAt(i);
 			switch (c) {
 				case '\'':
-					writeString("&apos;");
+					sb.append("&apos;");
 					break;
 				case '&':
-					writeString("&amp;");
+					sb.append("&amp;");
 					break;
 				case '<':
-					writeString("&lt;");
+					sb.append("&lt;");
 					break;
 				default:
-					writeChar(c);
+					sb.append(c);
 			}
 		}
+		return sb;
 	}
 
-	private int index;
-	private final char[] charBuf = new char[4096];
-
-	private void checkFlush(int i) throws IOException {
-		if (charBuf.length - index < i) {
-			flush();
-		}
-	}
-
-	private void flush() throws IOException {
-		writer.write(charBuf, 0, index);
-		index = 0;
-	}
-
-	private void writeString(String value) throws IOException {
-		int start = 0;
-		int end = value.length();
-		int len;
-		while ((len = charBuf.length - index) < end - start) {
-			value.getChars(start, start + len, charBuf, index);
-			start += len;
-			index = charBuf.length;
-			flush();
-		}
-		value.getChars(start, end, charBuf, index);
-		index += end - start;
-	}
-
-	/** Write a double to full precision */
-	private void writeLongDouble(double value) throws IOException {
-		checkFlush(22);
-        writeString(Double.toString(value));
-	}
 	/** Write a double truncated to OSM's 7 digits of precision
 	 *
-	 *  TODO: Optimize. Responsible for >30% of the runtime after other using binary 
+	 *  TODO: Optimize. Responsible for >30% of the runtime after other using binary
 	 *  format and improved hash table.
 	 */
-	private void writeDouble(double value) throws IOException {
-		checkFlush(22);
+	private StringBuffer writeDouble(double value) {
+		StringBuffer sb = new StringBuffer();
 		// Punt on some annoying specialcases
 		if (value < -200 || value > 200 || (value > -1 && value < 1))
-			writeString(numberFormat.format(value));
+			sb.append(numberFormat.format(value));
 		else {
 		     if (value < 0) {
-		    	 charBuf[index++] = '-'; // Write directly.
+			 sb.append('-');
 		    	 value = -value;
 		     }
 
-		int val = (int)Math.round(value*10000000);
-		StringBuilder s = new StringBuilder(Integer.toString(val));
-		s.insert(s.length()-7, '.');
-		writeString(s.toString());
+		     sb.append((int)Math.round(value*10000000));
+		     sb.insert(sb.length()-7, '.');
 		}
-	}
-	
-	private void writeInt(int value) throws IOException {
-		checkFlush(11);
-		index += Convert.intToString(value, charBuf, index);
-	}
-
-	private void writeChar(char value) throws IOException {
-		checkFlush(1);
-		charBuf[index++] = value;
+		return sb;
 	}
 }
diff --git a/src/uk/me/parabola/splitter/SplitProcessor.java b/src/uk/me/parabola/splitter/SplitProcessor.java
index 45c6516..a69f519 100644
--- a/src/uk/me/parabola/splitter/SplitProcessor.java
+++ b/src/uk/me/parabola/splitter/SplitProcessor.java
@@ -323,8 +323,10 @@ class SplitProcessor implements MapProcessor {
 						ArrayList<Element> elements =null;
 						try {
 							elements = workPackage.inputQueue.poll();
-							for (Element element : elements ) {
-								processElement(element, workPackage.writer);
+							if (elements != null) {
+								for (Element element : elements ) {
+									processElement(element, workPackage.writer);
+								}
 							}
 							
 						} catch (IOException e) {
_______________________________________________
mkgmap-dev mailing list
mkgmap-dev@lists.mkgmap.org.uk
http://www.mkgmap.org.uk/mailman/listinfo/mkgmap-dev

Reply via email to