Michael Paquier wrote on 23.06.2022 06:39:
That'd leave just DEGREE CELSIUS and DEGREE FAHRENHEIT.  Not sure how
to kill those last two special cases -- they should be directly
replaced by their decomposition.

[1] https://unicode-org.atlassian.net/browse/CLDR-11383
I patch v3 support for cirilic is added.
Special character function has been purged.
Added support for category: So - Other Symbol. This category include
characters from special_cases().
I think that we'd better split v3 into more patches to keep each
improvement isolated.  The addition of cyrillic characters in the
range of letters and the removal of the sound copyright from the
special cases can be done on their own, before considering the
original case tackled by this thread.
--
Michael
The only division that is probably possible is the one attached.

--
Przemysław Sztoch | Mobile +48 509 99 00 66
commit 84bb8cdfeda1d4bfef813870d74a6db6d595dbc0
Author: Przemyslaw Sztoch <pszt...@finn.pl>
Date:   Thu Jun 23 13:56:09 2022 +0200

    Update unnaccent rules generator
    
    add cirillic alpha
    add digits
    set->dict

diff --git a/contrib/unaccent/generate_unaccent_rules.py 
b/contrib/unaccent/generate_unaccent_rules.py
index c405e231b3..71932c8224 100644
--- a/contrib/unaccent/generate_unaccent_rules.py
+++ b/contrib/unaccent/generate_unaccent_rules.py
@@ -35,13 +35,16 @@ import xml.etree.ElementTree as ET
 sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
 
 # The ranges of Unicode characters that we consider to be "plain letters".
-# For now we are being conservative by including only Latin and Greek.  This
-# could be extended in future based on feedback from people with relevant
-# language knowledge.
+# For now we are being conservative by including only Latin, Greek and 
Cyrillic.
+# This could be extended in future based on feedback from people with
+# relevant language knowledge.
 PLAIN_LETTER_RANGES = ((ord('a'), ord('z')),  # Latin lower case
                        (ord('A'), ord('Z')),  # Latin upper case
-                       (0x03b1, 0x03c9),      # GREEK SMALL LETTER ALPHA, 
GREEK SMALL LETTER OMEGA
-                       (0x0391, 0x03a9))      # GREEK CAPITAL LETTER ALPHA, 
GREEK CAPITAL LETTER OMEGA
+                       (ord('0'), ord('9')),  # Digits
+                       (0x0391, 0x03a9),      # Greek capital letters 
(ALPHA-OMEGA)
+                       (0x03b1, 0x03c9),      # Greek small letters 
(ALPHA-OMEGA)
+                       (0x0410, 0x044f),      # Cyrillic capital and small 
letters
+                       (0x00b0, 0x00b0))      # Degree sign
 
 # Combining marks follow a "base" character, and result in a composite
 # character. Example: "U&'A\0300'"produces "À".There are three types of
@@ -139,24 +142,24 @@ def get_plain_letter(codepoint, table):
         return codepoint
 
     # Should not come here
-    assert(False)
+    assert False, 'Codepoint U+%0.2X' % codepoint.id
 
 
 def is_ligature(codepoint, table):
     """Return true for letters combined with letters."""
-    return all(is_letter(table[i], table) for i in codepoint.combining_ids)
+    return all(i in table and is_letter(table[i], table) for i in 
codepoint.combining_ids)
 
 
 def get_plain_letters(codepoint, table):
     """Return a list of plain letters from a ligature."""
-    assert(is_ligature(codepoint, table))
+    assert is_ligature(codepoint, table), 'Codepoint U+%0.2X' % codepoint.id
     return [get_plain_letter(table[id], table) for id in 
codepoint.combining_ids]
 
 
 def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
     """Parse the XML file and return a set of tuples (src, trg), where "src"
     is the original character and "trg" the substitute."""
-    charactersSet = set()
+    charactersDict = {}
 
     # RegEx to parse rules
     rulePattern = re.compile(r'^(?:(.)|(\\u[0-9a-fA-F]{4})) \u2192 
(?:\'(.+)\'|(.+)) ;')
@@ -196,25 +199,19 @@ def 
parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
             # the parser of unaccent only accepts non-whitespace characters
             # for "src" and "trg" (see unaccent.c)
             if not src.isspace() and not trg.isspace():
-                charactersSet.add((ord(src), trg))
+                charactersDict[ord(src)] = trg
 
-    return charactersSet
+    return charactersDict
 
 
 def special_cases():
     """Returns the special cases which are not handled by other methods"""
-    charactersSet = set()
+    charactersDict = {}
 
-    # Cyrillic
-    charactersSet.add((0x0401, "\u0415"))  # CYRILLIC CAPITAL LETTER IO
-    charactersSet.add((0x0451, "\u0435"))  # CYRILLIC SMALL LETTER IO
+    charactersDict[0x2103] = "\xb0C"   # DEGREE CELSIUS
+    charactersDict[0x2109] = "\xb0F"   # DEGREE FAHRENHEIT
 
-    # Symbols of "Letterlike Symbols" Unicode Block (U+2100 to U+214F)
-    charactersSet.add((0x2103, "\xb0C"))   # DEGREE CELSIUS
-    charactersSet.add((0x2109, "\xb0F"))   # DEGREE FAHRENHEIT
-    charactersSet.add((0x2117, "(P)"))     # SOUND RECORDING COPYRIGHT
-
-    return charactersSet
+    return charactersDict
 
 
 def main(args):
@@ -224,8 +221,8 @@ def main(args):
     table = {}
     all = []
 
-    # unordered set for ensure uniqueness
-    charactersSet = set()
+    # dictionary for ensure uniqueness
+    charactersDict = {}
 
     # read file UnicodeData.txt
     with codecs.open(
@@ -248,29 +245,26 @@ def main(args):
     # walk through all the codepoints looking for interesting mappings
     for codepoint in all:
         if codepoint.general_category.startswith('L') and \
-           len(codepoint.combining_ids) > 1:
+           len(codepoint.combining_ids) > 0:
             if is_letter_with_marks(codepoint, table):
-                charactersSet.add((codepoint.id,
-                                   chr(get_plain_letter(codepoint, table).id)))
+                charactersDict[codepoint.id] = chr(get_plain_letter(codepoint, 
table).id)
             elif args.noLigaturesExpansion is False and is_ligature(codepoint, 
table):
-                charactersSet.add((codepoint.id,
-                                   "".join(chr(combining_codepoint.id)
+                charactersDict[codepoint.id] = 
"".join(chr(combining_codepoint.id)
                                            for combining_codepoint
-                                           in get_plain_letters(codepoint, 
table))))
+                                           in get_plain_letters(codepoint, 
table))
         elif is_mark_to_remove(codepoint):
-            charactersSet.add((codepoint.id, None))
+            charactersDict[codepoint.id] = None
 
     # add CLDR Latin-ASCII characters
     if not args.noLigaturesExpansion:
-        charactersSet |= 
parse_cldr_latin_ascii_transliterator(args.latinAsciiFilePath)
-        charactersSet |= special_cases()
+        charactersDict |= 
parse_cldr_latin_ascii_transliterator(args.latinAsciiFilePath)
+        charactersDict |= special_cases()
 
     # sort for more convenient display
-    charactersList = sorted(charactersSet, key=lambda characterPair: 
characterPair[0])
-
-    for characterPair in charactersList:
-        print_record(characterPair[0], characterPair[1])
+    charactersList = sorted(charactersDict.keys(), key=lambda charId: charId)
 
+    for charId in charactersList:
+        print_record(charId, charactersDict[charId])
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='This script builds 
unaccent.rules on standard output when given the contents of UnicodeData.txt 
and Latin-ASCII.xml given as arguments.')
diff --git a/contrib/unaccent/unaccent.rules b/contrib/unaccent/unaccent.rules
index 3030166ed6..3a5d35627a 100644
--- a/contrib/unaccent/unaccent.rules
+++ b/contrib/unaccent/unaccent.rules
@@ -1,9 +1,12 @@
 ¡      !
 ©      (C)
+ª      a
 «      <<
 ­      -
 ®      (R)
 ±      +/-
+µ      μ
+º      o
 »      >>
 ¼       1/4
 ½       1/2
@@ -402,6 +405,11 @@
 ʦ      ts
 ʪ      ls
 ʫ      lz
+ʰ      h
+ʲ      j
+ʳ      r
+ʷ      w
+ʸ      y
 ʹ      '
 ʺ      "
 ʻ      '
@@ -417,6 +425,9 @@
 ˖      +
 ˗      -
 ˜      ~
+ˡ      l
+ˢ      s
+ˣ      x
 ̀
 ́
 ̂
@@ -536,8 +547,61 @@
 ό      ο
 ύ      υ
 ώ      ω
+ϐ      β
+ϑ      θ
+ϒ      Υ
+ϕ      φ
+ϖ      π
+ϰ      κ
+ϱ      ρ
+ϲ      ς
+ϴ      Θ
+ϵ      ε
+Ϲ      Σ
+Ѐ      Е
 Ё      Е
+Ѓ      Г
+Ќ      К
+Ѝ      И
+Ў      У
+Й      И
+й      и
+ѐ      е
 ё      е
+ѓ      г
+ќ      к
+ѝ      и
+ў      у
+Ӂ      Ж
+ӂ      ж
+Ӑ      А
+ӑ      а
+Ӓ      А
+ӓ      а
+Ӗ      Е
+ӗ      е
+Ӝ      Ж
+ӝ      ж
+Ӟ      З
+ӟ      з
+Ӣ      И
+ӣ      и
+Ӥ      И
+ӥ      и
+Ӧ      О
+ӧ      о
+Ӭ      Э
+ӭ      э
+Ӯ      У
+ӯ      у
+Ӱ      У
+ӱ      у
+Ӳ      У
+ӳ      у
+Ӵ      Ч
+ӵ      ч
+Ӹ      Ы
+ӹ      ы
 ᴀ      A
 ᴁ      AE
 ᴃ      B
@@ -556,6 +620,50 @@
 ᴠ      V
 ᴡ      W
 ᴢ      Z
+ᴬ      A
+ᴮ      B
+ᴰ      D
+ᴱ      E
+ᴳ      G
+ᴴ      H
+ᴵ      I
+ᴶ      J
+ᴷ      K
+ᴸ      L
+ᴹ      M
+ᴺ      N
+ᴼ      O
+ᴾ      P
+ᴿ      R
+ᵀ      T
+ᵁ      U
+ᵂ      W
+ᵃ      a
+ᵇ      b
+ᵈ      d
+ᵉ      e
+ᵍ      g
+ᵏ      k
+ᵐ      m
+ᵒ      o
+ᵖ      p
+ᵗ      t
+ᵘ      u
+ᵛ      v
+ᵝ      β
+ᵞ      γ
+ᵟ      δ
+ᵠ      φ
+ᵡ      χ
+ᵢ      i
+ᵣ      r
+ᵤ      u
+ᵥ      v
+ᵦ      β
+ᵧ      γ
+ᵨ      ρ
+ᵩ      φ
+ᵪ      χ
 ᵫ      ue
 ᵬ      b
 ᵭ      d
@@ -568,6 +676,7 @@
 ᵴ      s
 ᵵ      t
 ᵶ      z
+ᵸ      н
 ᵺ      th
 ᵻ      I
 ᵽ      p
@@ -592,6 +701,10 @@
 ᶓ      e
 ᶖ      i
 ᶙ      u
+ᶜ      c
+ᶠ      f
+ᶻ      z
+ᶿ      θ
 Ḁ      A
 ḁ      a
 Ḃ      B
@@ -947,12 +1060,19 @@
 Ὦ      Ω
 Ὧ      Ω
 ὰ      α
+ά      α
 ὲ      ε
+έ      ε
 ὴ      η
+ή      η
 ὶ      ι
+ί      ι
 ὸ      ο
+ό      ο
 ὺ      υ
+ύ      υ
 ὼ      ω
+ώ      ω
 ᾀ      α
 ᾁ      α
 ᾂ      α
@@ -1011,26 +1131,33 @@
 Ᾰ      Α
 Ᾱ      Α
 Ὰ      Α
+Ά      Α
 ᾼ      Α
+ι      ι
 ῂ      η
 ῃ      η
 ῄ      η
 ῆ      η
 ῇ      η
 Ὲ      Ε
+Έ      Ε
 Ὴ      Η
+Ή      Η
 ῌ      Η
 ῐ      ι
 ῑ      ι
 ῒ      ι
+ΐ      ι
 ῖ      ι
 ῗ      ι
 Ῐ      Ι
 Ῑ      Ι
 Ὶ      Ι
+Ί      Ι
 ῠ      υ
 ῡ      υ
 ῢ      υ
+ΰ      υ
 ῤ      ρ
 ῥ      ρ
 ῦ      υ
@@ -1038,6 +1165,7 @@
 Ῠ      Υ
 Ῡ      Υ
 Ὺ      Υ
+Ύ      Υ
 Ῥ      Ρ
 ῲ      ω
 ῳ      ω
@@ -1045,7 +1173,9 @@
 ῶ      ω
 ῷ      ω
 Ὸ      Ο
+Ό      Ο
 Ὼ      Ω
+Ώ      Ω
 ῼ      Ω
 ‐      -
 ‑      -
@@ -1077,6 +1207,20 @@
 ⁈      ?!
 ⁉      !?
 ⁎      *
+ⁱ      i
+ⁿ      n
+ₐ      a
+ₑ      e
+ₒ      o
+ₓ      x
+ₕ      h
+ₖ      k
+ₗ      l
+ₘ      m
+ₙ      n
+ₚ      p
+ₛ      s
+ₜ      t
 ₠      CE
 ₢      Cr
 ₣      Fr.
@@ -1119,7 +1263,10 @@
 ℞      Rx
 ℡      TEL
 ℤ      Z
+Ω      Ω
 ℨ      Z
+K      K
+Å      A
 ℬ      B
 ℭ      C
 ℯ      e
@@ -1129,6 +1276,10 @@
 ℴ      o
 ℹ      i
 ℻      FAX
+ℼ      π
+ℽ      γ
+ℾ      Γ
+ℿ      Π
 ⅅ      D
 ⅆ      d
 ⅇ      e
@@ -1281,6 +1432,8 @@
 ⱴ      v
 ⱸ      e
 ⱺ      o
+ⱼ      j
+ⱽ      V
 Ȿ      S
 Ɀ      Z
 、      ,
@@ -1379,6 +1532,8 @@
 ㏝      Wb
 ㏞      V/m
 ㏟      A/m
+ꚜ      ъ
+ꚝ      ь
 ꜰ      F
 ꜱ      S
 Ꜳ      AA
@@ -1455,6 +1610,9 @@
 Ꞩ      S
 ꞩ      s
 Ɦ      H
+ꟲ      C
+ꟳ      F
+ꟴ      Q
 ff      ff
 fi      fi
 fl      fl
@@ -1611,6 +1769,904 @@
 、      ,
 ←      <-
 →      ->
+𐞥      q
+𝐀      A
+𝐁      B
+𝐂      C
+𝐃      D
+𝐄      E
+𝐅      F
+𝐆      G
+𝐇      H
+𝐈      I
+𝐉      J
+𝐊      K
+𝐋      L
+𝐌      M
+𝐍      N
+𝐎      O
+𝐏      P
+𝐐      Q
+𝐑      R
+𝐒      S
+𝐓      T
+𝐔      U
+𝐕      V
+𝐖      W
+𝐗      X
+𝐘      Y
+𝐙      Z
+𝐚      a
+𝐛      b
+𝐜      c
+𝐝      d
+𝐞      e
+𝐟      f
+𝐠      g
+𝐡      h
+𝐢      i
+𝐣      j
+𝐤      k
+𝐥      l
+𝐦      m
+𝐧      n
+𝐨      o
+𝐩      p
+𝐪      q
+𝐫      r
+𝐬      s
+𝐭      t
+𝐮      u
+𝐯      v
+𝐰      w
+𝐱      x
+𝐲      y
+𝐳      z
+𝐴      A
+𝐵      B
+𝐶      C
+𝐷      D
+𝐸      E
+𝐹      F
+𝐺      G
+𝐻      H
+𝐼      I
+𝐽      J
+𝐾      K
+𝐿      L
+𝑀      M
+𝑁      N
+𝑂      O
+𝑃      P
+𝑄      Q
+𝑅      R
+𝑆      S
+𝑇      T
+𝑈      U
+𝑉      V
+𝑊      W
+𝑋      X
+𝑌      Y
+𝑍      Z
+𝑎      a
+𝑏      b
+𝑐      c
+𝑑      d
+𝑒      e
+𝑓      f
+𝑔      g
+𝑖      i
+𝑗      j
+𝑘      k
+𝑙      l
+𝑚      m
+𝑛      n
+𝑜      o
+𝑝      p
+𝑞      q
+𝑟      r
+𝑠      s
+𝑡      t
+𝑢      u
+𝑣      v
+𝑤      w
+𝑥      x
+𝑦      y
+𝑧      z
+𝑨      A
+𝑩      B
+𝑪      C
+𝑫      D
+𝑬      E
+𝑭      F
+𝑮      G
+𝑯      H
+𝑰      I
+𝑱      J
+𝑲      K
+𝑳      L
+𝑴      M
+𝑵      N
+𝑶      O
+𝑷      P
+𝑸      Q
+𝑹      R
+𝑺      S
+𝑻      T
+𝑼      U
+𝑽      V
+𝑾      W
+𝑿      X
+𝒀      Y
+𝒁      Z
+𝒂      a
+𝒃      b
+𝒄      c
+𝒅      d
+𝒆      e
+𝒇      f
+𝒈      g
+𝒉      h
+𝒊      i
+𝒋      j
+𝒌      k
+𝒍      l
+𝒎      m
+𝒏      n
+𝒐      o
+𝒑      p
+𝒒      q
+𝒓      r
+𝒔      s
+𝒕      t
+𝒖      u
+𝒗      v
+𝒘      w
+𝒙      x
+𝒚      y
+𝒛      z
+𝒜      A
+𝒞      C
+𝒟      D
+𝒢      G
+𝒥      J
+𝒦      K
+𝒩      N
+𝒪      O
+𝒫      P
+𝒬      Q
+𝒮      S
+𝒯      T
+𝒰      U
+𝒱      V
+𝒲      W
+𝒳      X
+𝒴      Y
+𝒵      Z
+𝒶      a
+𝒷      b
+𝒸      c
+𝒹      d
+𝒻      f
+𝒽      h
+𝒾      i
+𝒿      j
+𝓀      k
+𝓁      l
+𝓂      m
+𝓃      n
+𝓅      p
+𝓆      q
+𝓇      r
+𝓈      s
+𝓉      t
+𝓊      u
+𝓋      v
+𝓌      w
+𝓍      x
+𝓎      y
+𝓏      z
+𝓐      A
+𝓑      B
+𝓒      C
+𝓓      D
+𝓔      E
+𝓕      F
+𝓖      G
+𝓗      H
+𝓘      I
+𝓙      J
+𝓚      K
+𝓛      L
+𝓜      M
+𝓝      N
+𝓞      O
+𝓟      P
+𝓠      Q
+𝓡      R
+𝓢      S
+𝓣      T
+𝓤      U
+𝓥      V
+𝓦      W
+𝓧      X
+𝓨      Y
+𝓩      Z
+𝓪      a
+𝓫      b
+𝓬      c
+𝓭      d
+𝓮      e
+𝓯      f
+𝓰      g
+𝓱      h
+𝓲      i
+𝓳      j
+𝓴      k
+𝓵      l
+𝓶      m
+𝓷      n
+𝓸      o
+𝓹      p
+𝓺      q
+𝓻      r
+𝓼      s
+𝓽      t
+𝓾      u
+𝓿      v
+𝔀      w
+𝔁      x
+𝔂      y
+𝔃      z
+𝔄      A
+𝔅      B
+𝔇      D
+𝔈      E
+𝔉      F
+𝔊      G
+𝔍      J
+𝔎      K
+𝔏      L
+𝔐      M
+𝔑      N
+𝔒      O
+𝔓      P
+𝔔      Q
+𝔖      S
+𝔗      T
+𝔘      U
+𝔙      V
+𝔚      W
+𝔛      X
+𝔜      Y
+𝔞      a
+𝔟      b
+𝔠      c
+𝔡      d
+𝔢      e
+𝔣      f
+𝔤      g
+𝔥      h
+𝔦      i
+𝔧      j
+𝔨      k
+𝔩      l
+𝔪      m
+𝔫      n
+𝔬      o
+𝔭      p
+𝔮      q
+𝔯      r
+𝔰      s
+𝔱      t
+𝔲      u
+𝔳      v
+𝔴      w
+𝔵      x
+𝔶      y
+𝔷      z
+𝔸      A
+𝔹      B
+𝔻      D
+𝔼      E
+𝔽      F
+𝔾      G
+𝕀      I
+𝕁      J
+𝕂      K
+𝕃      L
+𝕄      M
+𝕆      O
+𝕊      S
+𝕋      T
+𝕌      U
+𝕍      V
+𝕎      W
+𝕏      X
+𝕐      Y
+𝕒      a
+𝕓      b
+𝕔      c
+𝕕      d
+𝕖      e
+𝕗      f
+𝕘      g
+𝕙      h
+𝕚      i
+𝕛      j
+𝕜      k
+𝕝      l
+𝕞      m
+𝕟      n
+𝕠      o
+𝕡      p
+𝕢      q
+𝕣      r
+𝕤      s
+𝕥      t
+𝕦      u
+𝕧      v
+𝕨      w
+𝕩      x
+𝕪      y
+𝕫      z
+𝕬      A
+𝕭      B
+𝕮      C
+𝕯      D
+𝕰      E
+𝕱      F
+𝕲      G
+𝕳      H
+𝕴      I
+𝕵      J
+𝕶      K
+𝕷      L
+𝕸      M
+𝕹      N
+𝕺      O
+𝕻      P
+𝕼      Q
+𝕽      R
+𝕾      S
+𝕿      T
+𝖀      U
+𝖁      V
+𝖂      W
+𝖃      X
+𝖄      Y
+𝖅      Z
+𝖆      a
+𝖇      b
+𝖈      c
+𝖉      d
+𝖊      e
+𝖋      f
+𝖌      g
+𝖍      h
+𝖎      i
+𝖏      j
+𝖐      k
+𝖑      l
+𝖒      m
+𝖓      n
+𝖔      o
+𝖕      p
+𝖖      q
+𝖗      r
+𝖘      s
+𝖙      t
+𝖚      u
+𝖛      v
+𝖜      w
+𝖝      x
+𝖞      y
+𝖟      z
+𝖠      A
+𝖡      B
+𝖢      C
+𝖣      D
+𝖤      E
+𝖥      F
+𝖦      G
+𝖧      H
+𝖨      I
+𝖩      J
+𝖪      K
+𝖫      L
+𝖬      M
+𝖭      N
+𝖮      O
+𝖯      P
+𝖰      Q
+𝖱      R
+𝖲      S
+𝖳      T
+𝖴      U
+𝖵      V
+𝖶      W
+𝖷      X
+𝖸      Y
+𝖹      Z
+𝖺      a
+𝖻      b
+𝖼      c
+𝖽      d
+𝖾      e
+𝖿      f
+𝗀      g
+𝗁      h
+𝗂      i
+𝗃      j
+𝗄      k
+𝗅      l
+𝗆      m
+𝗇      n
+𝗈      o
+𝗉      p
+𝗊      q
+𝗋      r
+𝗌      s
+𝗍      t
+𝗎      u
+𝗏      v
+𝗐      w
+𝗑      x
+𝗒      y
+𝗓      z
+𝗔      A
+𝗕      B
+𝗖      C
+𝗗      D
+𝗘      E
+𝗙      F
+𝗚      G
+𝗛      H
+𝗜      I
+𝗝      J
+𝗞      K
+𝗟      L
+𝗠      M
+𝗡      N
+𝗢      O
+𝗣      P
+𝗤      Q
+𝗥      R
+𝗦      S
+𝗧      T
+𝗨      U
+𝗩      V
+𝗪      W
+𝗫      X
+𝗬      Y
+𝗭      Z
+𝗮      a
+𝗯      b
+𝗰      c
+𝗱      d
+𝗲      e
+𝗳      f
+𝗴      g
+𝗵      h
+𝗶      i
+𝗷      j
+𝗸      k
+𝗹      l
+𝗺      m
+𝗻      n
+𝗼      o
+𝗽      p
+𝗾      q
+𝗿      r
+𝘀      s
+𝘁      t
+𝘂      u
+𝘃      v
+𝘄      w
+𝘅      x
+𝘆      y
+𝘇      z
+𝘈      A
+𝘉      B
+𝘊      C
+𝘋      D
+𝘌      E
+𝘍      F
+𝘎      G
+𝘏      H
+𝘐      I
+𝘑      J
+𝘒      K
+𝘓      L
+𝘔      M
+𝘕      N
+𝘖      O
+𝘗      P
+𝘘      Q
+𝘙      R
+𝘚      S
+𝘛      T
+𝘜      U
+𝘝      V
+𝘞      W
+𝘟      X
+𝘠      Y
+𝘡      Z
+𝘢      a
+𝘣      b
+𝘤      c
+𝘥      d
+𝘦      e
+𝘧      f
+𝘨      g
+𝘩      h
+𝘪      i
+𝘫      j
+𝘬      k
+𝘭      l
+𝘮      m
+𝘯      n
+𝘰      o
+𝘱      p
+𝘲      q
+𝘳      r
+𝘴      s
+𝘵      t
+𝘶      u
+𝘷      v
+𝘸      w
+𝘹      x
+𝘺      y
+𝘻      z
+𝘼      A
+𝘽      B
+𝘾      C
+𝘿      D
+𝙀      E
+𝙁      F
+𝙂      G
+𝙃      H
+𝙄      I
+𝙅      J
+𝙆      K
+𝙇      L
+𝙈      M
+𝙉      N
+𝙊      O
+𝙋      P
+𝙌      Q
+𝙍      R
+𝙎      S
+𝙏      T
+𝙐      U
+𝙑      V
+𝙒      W
+𝙓      X
+𝙔      Y
+𝙕      Z
+𝙖      a
+𝙗      b
+𝙘      c
+𝙙      d
+𝙚      e
+𝙛      f
+𝙜      g
+𝙝      h
+𝙞      i
+𝙟      j
+𝙠      k
+𝙡      l
+𝙢      m
+𝙣      n
+𝙤      o
+𝙥      p
+𝙦      q
+𝙧      r
+𝙨      s
+𝙩      t
+𝙪      u
+𝙫      v
+𝙬      w
+𝙭      x
+𝙮      y
+𝙯      z
+𝙰      A
+𝙱      B
+𝙲      C
+𝙳      D
+𝙴      E
+𝙵      F
+𝙶      G
+𝙷      H
+𝙸      I
+𝙹      J
+𝙺      K
+𝙻      L
+𝙼      M
+𝙽      N
+𝙾      O
+𝙿      P
+𝚀      Q
+𝚁      R
+𝚂      S
+𝚃      T
+𝚄      U
+𝚅      V
+𝚆      W
+𝚇      X
+𝚈      Y
+𝚉      Z
+𝚊      a
+𝚋      b
+𝚌      c
+𝚍      d
+𝚎      e
+𝚏      f
+𝚐      g
+𝚑      h
+𝚒      i
+𝚓      j
+𝚔      k
+𝚕      l
+𝚖      m
+𝚗      n
+𝚘      o
+𝚙      p
+𝚚      q
+𝚛      r
+𝚜      s
+𝚝      t
+𝚞      u
+𝚟      v
+𝚠      w
+𝚡      x
+𝚢      y
+𝚣      z
+𝚨      Α
+𝚩      Β
+𝚪      Γ
+𝚫      Δ
+𝚬      Ε
+𝚭      Ζ
+𝚮      Η
+𝚯      Θ
+𝚰      Ι
+𝚱      Κ
+𝚲      Λ
+𝚳      Μ
+𝚴      Ν
+𝚵      Ξ
+𝚶      Ο
+𝚷      Π
+𝚸      Ρ
+𝚺      Σ
+𝚻      Τ
+𝚼      Υ
+𝚽      Φ
+𝚾      Χ
+𝚿      Ψ
+𝛀      Ω
+𝛂      α
+𝛃      β
+𝛄      γ
+𝛅      δ
+𝛆      ε
+𝛇      ζ
+𝛈      η
+𝛉      θ
+𝛊      ι
+𝛋      κ
+𝛌      λ
+𝛍      μ
+𝛎      ν
+𝛏      ξ
+𝛐      ο
+𝛑      π
+𝛒      ρ
+𝛓      ς
+𝛔      σ
+𝛕      τ
+𝛖      υ
+𝛗      φ
+𝛘      χ
+𝛙      ψ
+𝛚      ω
+𝛢      Α
+𝛣      Β
+𝛤      Γ
+𝛥      Δ
+𝛦      Ε
+𝛧      Ζ
+𝛨      Η
+𝛩      Θ
+𝛪      Ι
+𝛫      Κ
+𝛬      Λ
+𝛭      Μ
+𝛮      Ν
+𝛯      Ξ
+𝛰      Ο
+𝛱      Π
+𝛲      Ρ
+𝛴      Σ
+𝛵      Τ
+𝛶      Υ
+𝛷      Φ
+𝛸      Χ
+𝛹      Ψ
+𝛺      Ω
+𝛼      α
+𝛽      β
+𝛾      γ
+𝛿      δ
+𝜀      ε
+𝜁      ζ
+𝜂      η
+𝜃      θ
+𝜄      ι
+𝜅      κ
+𝜆      λ
+𝜇      μ
+𝜈      ν
+𝜉      ξ
+𝜊      ο
+𝜋      π
+𝜌      ρ
+𝜍      ς
+𝜎      σ
+𝜏      τ
+𝜐      υ
+𝜑      φ
+𝜒      χ
+𝜓      ψ
+𝜔      ω
+𝜜      Α
+𝜝      Β
+𝜞      Γ
+𝜟      Δ
+𝜠      Ε
+𝜡      Ζ
+𝜢      Η
+𝜣      Θ
+𝜤      Ι
+𝜥      Κ
+𝜦      Λ
+𝜧      Μ
+𝜨      Ν
+𝜩      Ξ
+𝜪      Ο
+𝜫      Π
+𝜬      Ρ
+𝜮      Σ
+𝜯      Τ
+𝜰      Υ
+𝜱      Φ
+𝜲      Χ
+𝜳      Ψ
+𝜴      Ω
+𝜶      α
+𝜷      β
+𝜸      γ
+𝜹      δ
+𝜺      ε
+𝜻      ζ
+𝜼      η
+𝜽      θ
+𝜾      ι
+𝜿      κ
+𝝀      λ
+𝝁      μ
+𝝂      ν
+𝝃      ξ
+𝝄      ο
+𝝅      π
+𝝆      ρ
+𝝇      ς
+𝝈      σ
+𝝉      τ
+𝝊      υ
+𝝋      φ
+𝝌      χ
+𝝍      ψ
+𝝎      ω
+𝝖      Α
+𝝗      Β
+𝝘      Γ
+𝝙      Δ
+𝝚      Ε
+𝝛      Ζ
+𝝜      Η
+𝝝      Θ
+𝝞      Ι
+𝝟      Κ
+𝝠      Λ
+𝝡      Μ
+𝝢      Ν
+𝝣      Ξ
+𝝤      Ο
+𝝥      Π
+𝝦      Ρ
+𝝨      Σ
+𝝩      Τ
+𝝪      Υ
+𝝫      Φ
+𝝬      Χ
+𝝭      Ψ
+𝝮      Ω
+𝝰      α
+𝝱      β
+𝝲      γ
+𝝳      δ
+𝝴      ε
+𝝵      ζ
+𝝶      η
+𝝷      θ
+𝝸      ι
+𝝹      κ
+𝝺      λ
+𝝻      μ
+𝝼      ν
+𝝽      ξ
+𝝾      ο
+𝝿      π
+𝞀      ρ
+𝞁      ς
+𝞂      σ
+𝞃      τ
+𝞄      υ
+𝞅      φ
+𝞆      χ
+𝞇      ψ
+𝞈      ω
+𝞐      Α
+𝞑      Β
+𝞒      Γ
+𝞓      Δ
+𝞔      Ε
+𝞕      Ζ
+𝞖      Η
+𝞗      Θ
+𝞘      Ι
+𝞙      Κ
+𝞚      Λ
+𝞛      Μ
+𝞜      Ν
+𝞝      Ξ
+𝞞      Ο
+𝞟      Π
+𝞠      Ρ
+𝞢      Σ
+𝞣      Τ
+𝞤      Υ
+𝞥      Φ
+𝞦      Χ
+𝞧      Ψ
+𝞨      Ω
+𝞪      α
+𝞫      β
+𝞬      γ
+𝞭      δ
+𝞮      ε
+𝞯      ζ
+𝞰      η
+𝞱      θ
+𝞲      ι
+𝞳      κ
+𝞴      λ
+𝞵      μ
+𝞶      ν
+𝞷      ξ
+𝞸      ο
+𝞹      π
+𝞺      ρ
+𝞻      ς
+𝞼      σ
+𝞽      τ
+𝞾      υ
+𝞿      φ
+𝟀      χ
+𝟁      ψ
+𝟂      ω
 🄀      0.
 🄁      0,
 🄂      1,
diff --git a/contrib/unaccent/generate_unaccent_rules.py 
b/contrib/unaccent/generate_unaccent_rules.py
index 71932c8224..bb797fc954 100644
--- a/contrib/unaccent/generate_unaccent_rules.py
+++ b/contrib/unaccent/generate_unaccent_rules.py
@@ -208,8 +208,8 @@ def special_cases():
     """Returns the special cases which are not handled by other methods"""
     charactersDict = {}
 
-    charactersDict[0x2103] = "\xb0C"   # DEGREE CELSIUS
-    charactersDict[0x2109] = "\xb0F"   # DEGREE FAHRENHEIT
+    # Template example (already unnecessary):
+    #charactersDict[0x2103] = "\xb0C"   # DEGREE CELSIUS
 
     return charactersDict
 
@@ -252,6 +252,12 @@ def main(args):
                 charactersDict[codepoint.id] = 
"".join(chr(combining_codepoint.id)
                                            for combining_codepoint
                                            in get_plain_letters(codepoint, 
table))
+        elif (codepoint.general_category.startswith('N') or 
codepoint.general_category.startswith('So')) and \
+           len(codepoint.combining_ids) > 0 and \
+           args.noLigaturesExpansion is False and is_ligature(codepoint, 
table):
+            charactersDict[codepoint.id] = "".join(chr(combining_codepoint.id)
+                                       for combining_codepoint
+                                       in get_plain_letters(codepoint, table))
         elif is_mark_to_remove(codepoint):
             charactersDict[codepoint.id] = None
 
diff --git a/contrib/unaccent/unaccent.rules b/contrib/unaccent/unaccent.rules
index 3a5d35627a..9013014d2f 100644
--- a/contrib/unaccent/unaccent.rules
+++ b/contrib/unaccent/unaccent.rules
@@ -5,7 +5,10 @@
 ­      -
 ®      (R)
 ±      +/-
+²      2
+³      3
 µ      μ
+¹      1
 º      o
 »      >>
 ¼       1/4
@@ -1207,8 +1210,25 @@
 ⁈      ?!
 ⁉      !?
 ⁎      *
+⁰      0
 ⁱ      i
+⁴      4
+⁵      5
+⁶      6
+⁷      7
+⁸      8
+⁹      9
 ⁿ      n
+₀      0
+₁      1
+₂      2
+₃      3
+₄      4
+₅      5
+₆      6
+₇      7
+₈      8
+₉      9
 ₐ      a
 ₑ      e
 ₒ      o
@@ -1261,7 +1281,9 @@
 ℜ      R
 ℝ      R
 ℞      Rx
+℠      SM
 ℡      TEL
+™      TM
 ℤ      Z
 Ω      Ω
 ℨ      Z
@@ -1341,6 +1363,26 @@
 ∥      ||
 ≪      <<
 ≫      >>
+①      1
+②      2
+③      3
+④      4
+⑤      5
+⑥      6
+⑦      7
+⑧      8
+⑨      9
+⑩      10
+⑪      11
+⑫      12
+⑬      13
+⑭      14
+⑮      15
+⑯      16
+⑰      17
+⑱      18
+⑲      19
+⑳      20
 ⑴      (1)
 ⑵      (2)
 ⑶      (3)
@@ -1407,6 +1449,59 @@
 ⒳      (x)
 ⒴      (y)
 ⒵      (z)
+Ⓐ      A
+Ⓑ      B
+Ⓒ      C
+Ⓓ      D
+Ⓔ      E
+Ⓕ      F
+Ⓖ      G
+Ⓗ      H
+Ⓘ      I
+Ⓙ      J
+Ⓚ      K
+Ⓛ      L
+Ⓜ      M
+Ⓝ      N
+Ⓞ      O
+Ⓟ      P
+Ⓠ      Q
+Ⓡ      R
+Ⓢ      S
+Ⓣ      T
+Ⓤ      U
+Ⓥ      V
+Ⓦ      W
+Ⓧ      X
+Ⓨ      Y
+Ⓩ      Z
+ⓐ      a
+ⓑ      b
+ⓒ      c
+ⓓ      d
+ⓔ      e
+ⓕ      f
+ⓖ      g
+ⓗ      h
+ⓘ      i
+ⓙ      j
+ⓚ      k
+ⓛ      l
+ⓜ      m
+ⓝ      n
+ⓞ      o
+ⓟ      p
+ⓠ      q
+ⓡ      r
+ⓢ      s
+ⓣ      t
+ⓤ      u
+ⓥ      v
+ⓦ      w
+ⓧ      x
+ⓨ      y
+ⓩ      z
+⓪      0
 ⦅      ((
 ⦆      ))
 ⩴      ::=
@@ -1451,6 +1546,41 @@
 〛      ]
 〝      "
 〞      "
+㉐      PTE
+㉑      21
+㉒      22
+㉓      23
+㉔      24
+㉕      25
+㉖      26
+㉗      27
+㉘      28
+㉙      29
+㉚      30
+㉛      31
+㉜      32
+㉝      33
+㉞      34
+㉟      35
+㊱      36
+㊲      37
+㊳      38
+㊴      39
+㊵      40
+㊶      41
+㊷      42
+㊸      43
+㊹      44
+㊺      45
+㊻      46
+㊼      47
+㊽      48
+㊾      49
+㊿      50
+㋌      Hg
+㋍      erg
+㋎      eV
+㋏      LTD
 ㍱      hPa
 ㍲      da
 ㍳      AU
@@ -1461,6 +1591,7 @@
 ㍺      IU
 ㎀      pA
 ㎁      nA
+㎂      μA
 ㎃      mA
 ㎄      kA
 ㎅      KB
@@ -1470,6 +1601,8 @@
 ㎉      kcal
 ㎊      pF
 ㎋      nF
+㎌      μF
+㎍      μg
 ㎎      mg
 ㎏      kg
 ㎐      Hz
@@ -1479,6 +1612,7 @@
 ㎔      THz
 ㎙      fm
 ㎚      nm
+㎛      μm
 ㎜      mm
 ㎝      cm
 ㎞      km
@@ -1491,17 +1625,22 @@
 ㎮      rad/s
 ㎰      ps
 ㎱      ns
+㎲      μs
 ㎳      ms
 ㎴      pV
 ㎵      nV
+㎶      μV
 ㎷      mV
 ㎸      kV
 ㎹      MV
 ㎺      pW
 ㎻      nW
+㎼      μW
 ㎽      mW
 ㎾      kW
 ㎿      MW
+㏀      kΩ
+㏁      MΩ
 ㏂      a.m.
 ㏃      Bq
 ㏄      cc
@@ -1532,6 +1671,7 @@
 ㏝      Wb
 ㏞      V/m
 ㏟      A/m
+㏿      gal
 ꚜ      ъ
 ꚝ      ь
 ꜰ      F
@@ -2667,6 +2807,56 @@
 𝟀      χ
 𝟁      ψ
 𝟂      ω
+𝟎      0
+𝟏      1
+𝟐      2
+𝟑      3
+𝟒      4
+𝟓      5
+𝟔      6
+𝟕      7
+𝟖      8
+𝟗      9
+𝟘      0
+𝟙      1
+𝟚      2
+𝟛      3
+𝟜      4
+𝟝      5
+𝟞      6
+𝟟      7
+𝟠      8
+𝟡      9
+𝟢      0
+𝟣      1
+𝟤      2
+𝟥      3
+𝟦      4
+𝟧      5
+𝟨      6
+𝟩      7
+𝟪      8
+𝟫      9
+𝟬      0
+𝟭      1
+𝟮      2
+𝟯      3
+𝟰      4
+𝟱      5
+𝟲      6
+𝟳      7
+𝟴      8
+𝟵      9
+𝟶      0
+𝟷      1
+𝟸      2
+𝟹      3
+𝟺      4
+𝟻      5
+𝟼      6
+𝟽      7
+𝟾      8
+𝟿      9
 🄀      0.
 🄁      0,
 🄂      1,
@@ -2704,3 +2894,53 @@
 🄧      (X)
 🄨      (Y)
 🄩      (Z)
+🄫      C
+🄬      R
+🄭      CD
+🄮      WZ
+🄰      A
+🄱      B
+🄲      C
+🄳      D
+🄴      E
+🄵      F
+🄶      G
+🄷      H
+🄸      I
+🄹      J
+🄺      K
+🄻      L
+🄼      M
+🄽      N
+🄾      O
+🄿      P
+🅀      Q
+🅁      R
+🅂      S
+🅃      T
+🅄      U
+🅅      V
+🅆      W
+🅇      X
+🅈      Y
+🅉      Z
+🅊      HV
+🅋      MV
+🅌      SD
+🅍      SS
+🅎      PPV
+🅏      WC
+🅪      MC
+🅫      MD
+🅬      MR
+🆐      DJ
+🯰      0
+🯱      1
+🯲      2
+🯳      3
+🯴      4
+🯵      5
+🯶      6
+🯷      7
+🯸      8
+🯹      9

Reply via email to