Hi Gerd

Attached is version 3 of the patch.

The significant problem was the logic (in Mdr5) where change in sortKey
only was used to make unique lists, implying TERTIARY differences. The
related structures had been processed with a Collator with SECONDARY
strength. I've added an option to set the strength in the keys.

The other changes are in Mdr25.sortCities:

If the same city name with the same region name was in 2 countries, it
didn't spot the new country - I realise this is very unlikely.

Be consistent with collator strength when spotting changes in country.
Again, unlikely that the same country occurs with different letter-
case.

Ticker

On Mon, 2021-10-25 at 08:27 +0100, svn commit wrote:
> Version mkgmap-r4810 was committed by gerd on Mon, 25 Oct 2021
> 
> revert changes from r4809 for now, they caused more trouble
> 
> http://www.mkgmap.org.uk/websvn/revision.php?repname=mkgmap&rev=4810
> _______________________________________________
> mkgmap-svn mailing list
> To unsubscribe send an mail to mkgmap-svn-le...@lists.mkgmap.org.uk
> https://www.mkgmap.org.uk/mailman/listinfo/mkgmap-svn

Index: src/uk/me/parabola/imgfmt/app/mdr/Mdr23.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/mdr/Mdr23.java	(revision 4810)
+++ src/uk/me/parabola/imgfmt/app/mdr/Mdr23.java	(working copy)
@@ -12,6 +12,7 @@
  */
 package uk.me.parabola.imgfmt.app.mdr;
 
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -37,6 +38,8 @@
 	 */
 	public void sortRegions(List<Mdr13Record> list) {
 		Sort sort = getConfig().getSort();
+		Collator collator = sort.getCollator();
+		collator.setStrength(Collator.SECONDARY);
 		List<SortKey<Mdr13Record>> keys = MdrUtils.sortList(sort, list);
 
 		String lastName = null;
@@ -47,7 +50,7 @@
 
 			// Only add if different name or map
 			String name = reg.getName();
-			if (reg.getMapIndex() != lastMapIndex || !name.equals(lastName)) {
+			if (lastName == null || reg.getMapIndex() != lastMapIndex || collator.compare(name, lastName) != 0) {
 				record++;
 				reg.getMdr28().setMdr23(record);
 				regions.add(reg);
Index: src/uk/me/parabola/imgfmt/app/mdr/Mdr24.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/mdr/Mdr24.java	(revision 4810)
+++ src/uk/me/parabola/imgfmt/app/mdr/Mdr24.java	(working copy)
@@ -12,6 +12,7 @@
  */
 package uk.me.parabola.imgfmt.app.mdr;
 
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -37,6 +38,8 @@
 	 */
 	public void sortCountries(List<Mdr14Record> list) {
 		Sort sort = getConfig().getSort();
+		Collator collator = sort.getCollator();
+		collator.setStrength(Collator.SECONDARY);
 		List<SortKey<Mdr14Record>> keys = MdrUtils.sortList(sort, list);
 
 		String lastName = null;
@@ -48,7 +51,7 @@
 			// If this is a new name, then we prepare a mdr29 record for it.
 			String name = c.getName();
 
-			if (lastMapIndex != c.getMapIndex() || !name.equals(lastName)) {
+			if (lastName == null || lastMapIndex != c.getMapIndex() || collator.compare(name, lastName) != 0) {
 				record++;
 				c.getMdr29().setMdr24(record);
 				countries.add(c);
Index: src/uk/me/parabola/imgfmt/app/mdr/Mdr25.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/mdr/Mdr25.java	(revision 4810)
+++ src/uk/me/parabola/imgfmt/app/mdr/Mdr25.java	(working copy)
@@ -12,6 +12,7 @@
  */
 package uk.me.parabola.imgfmt.app.mdr;
 
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -37,6 +38,9 @@
 	 */
 	public void sortCities(List<Mdr5Record> list) {
 		Sort sort = getConfig().getSort();
+		sort.setKeyStrength(Collator.SECONDARY);
+		Collator collator = sort.getCollator();
+		collator.setStrength(Collator.SECONDARY);
 
 		List<SortKey<Mdr5Record>> keys = new ArrayList<>();
 		for (Mdr5Record c : list) {
@@ -53,8 +57,9 @@
 			Mdr5Record city = key.getObject();
 
 			if (lastCity == null ||
-					(!city.getName().equals(lastCity.getName()) || !(city.getRegionName().equals(lastCity.getRegionName()))))
-			{
+				collator.compare(city.getName(), lastCity.getName()) != 0 ||
+				collator.compare(city.getRegionName(), lastCity.getRegionName()) != 0 ||
+				collator.compare(city.getCountryName(), lastCity.getCountryName()) != 0) {
 				record++;
 
 				// Record in the 29 index if there is one for this record
@@ -61,8 +66,8 @@
 				Mdr14Record mdrCountry = city.getMdrCountry();
 				Mdr29Record mdr29 = mdrCountry.getMdr29();
 				String name = mdr29.getName();
-				assert mdrCountry.getName().equals(name);
-				if (!name.equals(lastName)) {
+				assert collator.compare(mdrCountry.getName(), name) == 0;
+				if (lastName == null || collator.compare(name, lastName) != 0) {
 					mdr29.setMdr25(record);
 					lastName = name;
 				}
Index: src/uk/me/parabola/imgfmt/app/mdr/Mdr28.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/mdr/Mdr28.java	(revision 4810)
+++ src/uk/me/parabola/imgfmt/app/mdr/Mdr28.java	(working copy)
@@ -12,6 +12,7 @@
  */
 package uk.me.parabola.imgfmt.app.mdr;
 
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -35,6 +36,8 @@
 
 	public void buildFromRegions(List<Mdr13Record> regions) {
 		Sort sort = getConfig().getSort();
+		Collator collator = sort.getCollator();
+		collator.setStrength(Collator.SECONDARY);
 		List<SortKey<Mdr13Record>> keys = MdrUtils.sortList(sort, regions);
 
 		int record = 0;
@@ -44,7 +47,7 @@
 			Mdr13Record region = key.getObject();
 
 			String name = region.getName();
-			if (!name.equals(lastName)) {
+			if (lastName == null || collator.compare(name, lastName) != 0) {
 				record++;
 				mdr28 = new Mdr28Record();
 				mdr28.setIndex(record);
Index: src/uk/me/parabola/imgfmt/app/mdr/Mdr29.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/mdr/Mdr29.java	(revision 4810)
+++ src/uk/me/parabola/imgfmt/app/mdr/Mdr29.java	(working copy)
@@ -12,6 +12,7 @@
  */
 package uk.me.parabola.imgfmt.app.mdr;
 
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -36,6 +37,8 @@
 
 	public void buildFromCountries(List<Mdr14Record> countries) {
 		Sort sort = getConfig().getSort();
+		Collator collator = sort.getCollator();
+		collator.setStrength(Collator.SECONDARY);
 		List<SortKey<Mdr14Record>> keys = MdrUtils.sortList(sort, countries);
 
 		// Sorted by name, for every new name we allocate a new 29 record and set the same one in every
@@ -46,7 +49,7 @@
 			Mdr14Record country = key.getObject();
 
 			String name = country.getName();
-			if (!name.equals(lastName)) {
+			if (lastName == null || collator.compare(name, lastName) != 0) {
 				mdr29 = new Mdr29Record();
 				mdr29.setName(name);
 				mdr29.setStrOffset(country.getStrOff());
@@ -83,6 +86,8 @@
 		PointerSizes sizes = getSizes();
 		int size24 = sizes.getSize(24);
 		int size22 = sizes.getSize(22);
+		assert sizes.getNumberOfItems(5) >= sizes.getNumberOfItems(25)
+			: "5:" + sizes.getNumberOfItems(5) + " 25:" + sizes.getNumberOfItems(25);
 		int size25 = sizes.getSize(5);  // NB appears to be size of 5 (cities), not 25 (cities with country).
 		int size26 = has26? sizes.getSize(26): 0;
 		int size17 = Utils.numberToPointerSize(max17);
Index: src/uk/me/parabola/imgfmt/app/mdr/Mdr5.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/mdr/Mdr5.java	(revision 4810)
+++ src/uk/me/parabola/imgfmt/app/mdr/Mdr5.java	(working copy)
@@ -39,6 +39,8 @@
 	private int maxCityIndex;
 	private int localCitySize;
 	private int mdr20PointerSize = 0; // bytes for mdr20 pointer, or 0 if no mdr20
+	private Sort sort;
+	private Collator collator;
 
 	// We need a common area to save the mdr20 values, since there can be multiple
 	// city records with the same global city index
@@ -46,6 +48,10 @@
 
 	public Mdr5(MdrConfig config) {
 		setConfig(config);
+		sort = config.getSort();
+		sort.setKeyStrength(Collator.SECONDARY);
+		collator = sort.getCollator();
+		collator.setStrength(Collator.SECONDARY);
 	}
 
 	public void addCity(Mdr5Record record) {
@@ -62,15 +68,14 @@
 	public void preWriteImpl() {
 		allCities.trimToSize();
 		localCitySize = Utils.numberToPointerSize(maxCityIndex + 1);
-		Sort sort = getConfig().getSort();
-		genCitiesAndMdr20s(sort);
+		genCitiesAndMdr20s();
 		// calculate positions for the different road indexes
-		calcMdr20SortPos(sort);
-		calcMdr21SortPos(sort);
-		calcMdr22SortPos(sort);
+		calcMdr20SortPos();
+		calcMdr21SortPos();
+		calcMdr22SortPos();
 	}
 
-	private void genCitiesAndMdr20s(Sort sort) {
+	private void genCitiesAndMdr20s() {
 		List<SortKey<Mdr5Record>> sortKeys = new ArrayList<>(allCities.size());
 		Map<String, byte[]> regionCache = new HashMap<>();
 		Map<String, byte[]> countryCache = new HashMap<>();
@@ -90,7 +95,6 @@
 		sortKeys.sort(null);
 
 		cities = new ArrayList<>(sortKeys.size());
-		Collator collator = sort.getCollator();
 
 		int count = 0;
 		Mdr5Record lastCity = null;
@@ -122,7 +126,7 @@
 	/**
 	 * Calculate a position when sorting by name, region, and country- This is used for MDR20. 
 	 */
-	private void calcMdr20SortPos(Sort sort) {
+	private void calcMdr20SortPos() {
 		List<SortKey<Mdr5Record>> sortKeys = new ArrayList<>(allCities.size());
 		Map<String, byte[]> regionCache = new HashMap<>();
 		Map<String, byte[]> countryCache = new HashMap<>();
@@ -155,7 +159,7 @@
 	/**
 	 * Calculate a position when sorting by region- This is used for MDR21. 
 	 */
-	private void calcMdr21SortPos(Sort sort) {
+	private void calcMdr21SortPos() {
 		List<SortKey<Mdr5Record>> sortKeys = new ArrayList<>(allCities.size());
 		for (Mdr5Record m : allCities) {
 			if (m.getRegionName() == null) 
@@ -181,7 +185,7 @@
 	 * Calculate a position when sorting by country- This is used for MDR22. 
 	 */
 
-	private void calcMdr22SortPos(Sort sort) {
+	private void calcMdr22SortPos() {
 		List<SortKey<Mdr5Record>> sortKeys = new ArrayList<>(allCities.size());
 		for (Mdr5Record m : allCities) {
 			if (m.getCountryName() == null)
@@ -208,7 +212,6 @@
 		Mdr5Record lastCity = null;
 		boolean hasString = hasFlag(0x8);
 		boolean hasRegion = hasFlag(0x4);
-		Collator collator = getConfig().getSort().getCollator();
 		for (Mdr5Record city : cities) {
 			int gci = city.getGlobalCityIndex();
 			addIndexPointer(city.getMapIndex(), gci);
@@ -284,7 +287,7 @@
 		int val = (localCitySize - 1);
 		// String offset is only included for a mapsource index.
 		if (isForDevice() ) {
-			if (!getConfig().getSort().isMulti())
+			if (!sort.isMulti())
 				val |= 0x40; // mdr17 sub section present (not with unicode)
 		} else {
 			val |= 0x04;  // region
Index: src/uk/me/parabola/imgfmt/app/srt/Sort.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/srt/Sort.java	(revision 4810)
+++ src/uk/me/parabola/imgfmt/app/srt/Sort.java	(working copy)
@@ -72,6 +72,9 @@
 	private int headerLen = SRTHeader.HEADER_LEN; 
 	private int header3Len = -1;
 
+	private int maxPrimary = 0;  // max seen while loading resource/sort/cp*.txt file. == 10690 for cp65001.txt on 18Oct2021
+	private int keyStrength = Collator.TERTIARY;
+
 	public Sort() {
 		pages[0] = new Page();
 	}
@@ -80,9 +83,11 @@
 		ensurePage(ch >>> 8);
 		if (getPrimary(ch) != 0)
 			throw new ExitException(String.format("Repeated primary index 0x%x", ch & 0xff));
-		setPrimary (ch, primary);
+		if (primary > maxPrimary)
+			maxPrimary = primary;
+		setPrimary(ch, primary);
 		setSecondary(ch, secondary);
-		setTertiary( ch, tertiary);
+		setTertiary(ch, tertiary);
 
 		setFlags(ch, flags);
 		int numExp = (flags >> 4) & 0xf;
@@ -389,10 +394,21 @@
 	 */
 	private int fillCompleteKey(char[] bVal, byte[] key) {
 		int start = fillKey(Collator.PRIMARY, bVal, key, 0);
-		start = fillKey(Collator.SECONDARY, bVal, key, start);
-		return fillKey(Collator.TERTIARY, bVal, key, start);
+		if (keyStrength != Collator.PRIMARY) {
+			start = fillKey(Collator.SECONDARY, bVal, key, start);
+			if (keyStrength != Collator.SECONDARY)
+				start = fillKey(Collator.TERTIARY, bVal, key, start);
+		}
+		return start;
 	}
 
+	private static int writeSort(int strength, int pos, byte[] outKey, int start) {
+		if (strength == Collator.PRIMARY)
+			outKey[start++] = (byte) ((pos >> 8) & 0xff); // for 2 byte charsets
+		outKey[start++] = (byte) (pos & 0xff);
+		return start;
+	}
+
 	/**
 	 * Fill in the output key for a given strength.
 	 *
@@ -405,22 +421,25 @@
 		int index = start;
 		for (char c : input) {
 
-			if (!hasPage(c >>> 8))
+			if (!hasPage(c >>> 8)) {
+				if (isMulti() && type == Collator.PRIMARY) // attempt to avoid conflict with defined sorts. Be consistent with SrtCollator
+					index = writeSort(type, c + maxPrimary, outKey, index);
 				continue;
+			}
 
 			int exp = (getFlags(c) >> 4) & 0xf;
 			if (exp == 0) {
+				int startIndex = index;
 				index = writePos(type, c, outKey, index);
+				if (index == startIndex && isMulti() && type == Collator.PRIMARY) { // nothing got written but is needed
+					index = writeSort(type, c + maxPrimary, outKey, index);
+				}
 			} else {
 				// now have to redirect to a list of input chars, get the list via the primary value always.
 				int idx = getPrimary(c);
 				for (int i = idx - 1; i < idx + exp; i++) {
 					int pos = expansions.get(i).getPosition(type);
-					if (pos != 0) {
-						if (type == Collator.PRIMARY)
-							outKey[index++] = (byte) ((pos >>> 8) & 0xff);
-						outKey[index++] = (byte) pos;
-					}
+					index = writeSort(type, pos, outKey, index);
 				}
 			}
 		}
@@ -572,6 +591,19 @@
 		return multi;
 	}
 
+	/**
+	 * This determines the strength level in keys generated by CreateSortKey().
+	 * If lower strength Collators are going to be used to scan the result of the
+	 * sort then there is no point in adding the extra levels (more bytes) to each key.
+	 * More importantly, it allows sort/unique logic to notice changes in just the key,
+	 * rather than having to get the components and use a lower strength Collator to compare.
+	 *
+	 * @param strength The required key strength
+	 */
+	public void setKeyStrength(int strength) {
+		keyStrength = strength;
+	}
+
 	public int getPos(int type, int ch) {
 		return pages[ch >>> 8].getPos(type, ch);
 	}
@@ -681,11 +713,8 @@
 		 */
 		public int writePos(int strength, int ch, byte[] outKey, int start) {
 			int pos = getPos(strength, ch);
-			if (pos != 0) {
-				if (strength == Collator.PRIMARY)
-					outKey[start++] = (byte) ((pos >> 8) & 0xff); // for 2 byte charsets
-				outKey[start++] = (byte) (pos & 0xff);
-			}
+			if (pos != 0)
+				start = writeSort(strength, pos, outKey, start);
 			return start;
 		}
 	}
@@ -851,8 +880,10 @@
 						}
 
 						// Get the first non-ignorable at this level
-						int c = chars[pos++ & 0xff];
+						int c = chars[pos++];
 						if (!hasPage(c >>> 8)) {
+							if (isMulti() && type == Collator.PRIMARY) // order by char itself if no sortpos
+								return (c + maxPrimary) & 0xffff; // attempt to avoid conflict with defined sorts. Be consistent with fillKey
 							next = 0;
 							continue;
 						}
@@ -869,6 +900,8 @@
 								expPos = 0;
 						} else {
 							next = getPos(type, c);
+							if (next == 0 && isMulti() && type == Collator.PRIMARY) // like earlier
+								return (c + maxPrimary) & 0xffff;
 						}
 
 					} while (next == 0);
_______________________________________________
mkgmap-dev mailing list
mkgmap-dev@lists.mkgmap.org.uk
https://www.mkgmap.org.uk/mailman/listinfo/mkgmap-dev

Reply via email to