Your message dated Mon, 9 Aug 2021 15:48:54 +0000
with message-id 
<CALw=ruzbydxcxka4dqv3h5aztm351gxvztcwa3zthhx8-43...@mail.gmail.com>
and subject line i need your reply
has caused the Debian Bug report #197427,
regarding unzip: incorrectly converts cyrillic file names from Windows-created 
ZIPs
to be marked as done.

This means that you claim that the problem has been dealt with.
If this is not the case it is now your responsibility to reopen the
Bug report if necessary, and/or fix the problem forthwith.

(NB: If you are a system administrator and have no idea what this
message is talking about, this may indicate a serious mail system
misconfiguration somewhere. Please contact [email protected]
immediately.)


-- 
197427: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=197427
Debian Bug Tracking System
Contact [email protected] with problems
--- Begin Message ---
Package: unzip
Version: 5.52-3
Severity: important
Tags: patch l10n


Problem: ZIP-files created on Windows have cyrillic file names encoded
in CP866 codepage. unzip incorrectly converts them as being CP850 to
CP1252 and renders cyrillic filenames to be unreadable.

Example:
$ unzip -l pikalov1.zip 
Archive:  pikalov1.zip
  Length     Date   Time    Name
 --------    ----   ----    ----
  3103866  11-09-05 19:43   05.  ╩╚╪║╞ ╫╞М©╪А╢, ╩╚╪║╞.mp3
  3193521  11-09-05 19:43   01. Х╫я║ ты ╪╚╝ ╚╩АСч©╝.mp3
  3451644  11-09-05 19:43   02. ЪАЗА║щ ты ╪╚╢, ЪАЗА║щ.mp3
  2819472  11-09-05 19:43   03.ьу Сы ъА║©, ъА║©.mp3
  3375333  11-09-05 19:43   04. е©ъЩ╪╚ ╛ ╪Атяс©.mp3
 --------                   -------
 15943836                   5 files

I think this greately affects interoperability with other systems.
It seems it also affects graphical front archiver tools like
file-roller.
 
Known solution: There is a patch in russian distro Altlinux, that
adds `smart' filename encoding detection based on locale as well as
command line switches to specify file name encoding explicitely.

The details are available here (in Russian):
https://bugzilla.altlinux.ru/show_bug.cgi?id=4871

I attach their patch to this bug report (it is for 5.50, yet it
works for me with 5.52). Example is below:

$ ~/tmp/unzip-5.52/unzip -l pikalov1.zip
Archive:  pikalov1.zip
  Length     Date   Time    Name
 --------    ----   ----    ----
  3103866  11-09-05 19:43   05. Я помню любимая, помню.mp3
  3193521  11-09-05 19:43   01. Клен ты мой опавший.mp3
  3451644  11-09-05 19:43   02. Шаганэ ты моя, Шаганэ.mp3
  2819472  11-09-05 19:43   03.Эх вы сани, сани.mp3
  3375333  11-09-05 19:43   04. Письмо к матери.mp3
 --------                   -------
 15943836                   5 files


-- System Information:
Debian Release: testing/unstable
  APT prefers unstable
  APT policy: (500, 'unstable'), (500, 'testing')
Architecture: i386 (i686)
Shell:  /bin/sh linked to /bin/bash
Kernel: Linux 2.6.8-2-686
Locale: LANG=ru_RU.KOI8-R, LC_CTYPE=ru_RU.KOI8-R (charmap=KOI8-R)

Versions of packages unzip depends on:
ii  libc6                         2.3.5-6    GNU C Library: Shared libraries an

unzip recommends no packages.

-- no debconf information
diff -Nur unzip-5.50.orig/unix/unix.c unzip-5.50/unix/unix.c
--- unzip-5.50.orig/unix/unix.c	2002-01-22 01:54:42 +0300
+++ unzip-5.50/unix/unix.c	2004-07-27 22:42:19 +0400
@@ -29,6 +29,9 @@
 #define UNZIP_INTERNAL
 #include "unzip.h"
 
+#include <iconv.h>
+#include <langinfo.h>
+
 #ifdef SCO_XENIX
 #  define SYSNDIR
 #else  /* SCO Unix, AIX, DNIX, TI SysV, Coherent 4.x, ... */
@@ -1524,3 +1527,89 @@
     }
 }
 #endif /* QLZIP */
+
+
+typedef struct {
+    char *local_charset;
+    char *archive_charset;
+} CHARSET_MAP;
+
+/* A mapping of local <-> archive charsets used by default to convert filenames
+ * of DOS/Windows Zip archives. Currently very basic. */
+static CHARSET_MAP dos_charset_map[] = {
+    { "ANSI_X3.4-1968", "CP850" },
+    { "ISO-8859-1", "CP850" },
+    { "CP1252", "CP850" },
+    { "CP1251", "CP866" },
+    { "KOI8-R", "CP866" },
+    { "KOI8-U", "CP866" },
+    { "ISO-8859-5", "CP866" }
+};
+
+char OEM_CP[MAX_CP_NAME] = "";
+char ISO_CP[MAX_CP_NAME] = "";
+
+/* Try to guess the default value of OEM_CP based on the current locale.
+ * ISO_CP is left alone for now. */
+void init_conversion_charsets()
+{
+    const char *local_charset;
+    int i;
+
+    /* Make a guess only if OEM_CP not already set. */ 
+    if(*OEM_CP == '\0') {
+    	local_charset = nl_langinfo(CODESET);
+    	for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++)
+    		if(!strcasecmp(local_charset, dos_charset_map[i].local_charset)) {
+    			strncpy(OEM_CP, dos_charset_map[i].archive_charset,
+    					sizeof(OEM_CP));
+    			break;
+    		}
+    }
+}
+
+/* Convert a string from one encoding to the current locale using iconv().
+ * Be as non-intrusive as possible. If error is encountered during covertion
+ * just leave the string intact. */
+static void charset_to_intern(char *string, char *from_charset)
+{
+    iconv_t cd;
+    char *s, *d, *buf;
+    size_t slen, dlen, buflen;
+    const char *local_charset;
+
+    if(*from_charset == '\0')
+    	return;
+
+    buf = NULL;
+    local_charset = nl_langinfo(CODESET);
+
+    if((cd = iconv_open(local_charset, from_charset)) == (iconv_t)-1)
+        return;
+
+    slen = dlen = buflen = strlen(string);
+    s = string;
+    d = buf = malloc(buflen + 1);
+    if(!d)
+    	goto cleanup;
+
+    if(iconv(cd, &s, &slen, &d, &dlen) == (size_t)-1)
+    	goto cleanup;
+    strncpy(string, buf, buflen);
+    
+    cleanup:
+    free(buf);
+    iconv_close(cd);
+}
+
+/* Convert a string from OEM_CP to the current locale charset. */
+inline void oem_intern(char *string)
+{
+    charset_to_intern(string, OEM_CP);
+}
+
+/* Convert a string from ISO_CP to the current locale charset. */
+inline void iso_intern(char *string)
+{
+    charset_to_intern(string, ISO_CP);
+}
diff -Nur unzip-5.50.orig/unix/unxcfg.h unzip-5.50/unix/unxcfg.h
--- unzip-5.50.orig/unix/unxcfg.h	2001-06-04 03:27:14 +0400
+++ unzip-5.50/unix/unxcfg.h	2004-07-27 22:42:19 +0400
@@ -123,4 +123,30 @@
 /* wild_dir, dirname, wildname, matchname[], dirnamelen, have_dirname, */
 /*    and notfirstcall are used by do_wild().                          */
 
+
+#define MAX_CP_NAME 25 
+   
+#ifdef SETLOCALE
+#  undef SETLOCALE
+#endif
+#define SETLOCALE(category, locale) setlocale(category, locale)
+#include <locale.h>
+   
+#ifdef _ISO_INTERN
+#  undef _ISO_INTERN
+#endif
+#define _ISO_INTERN(str1) iso_intern(str1)
+
+#ifdef _OEM_INTERN
+#  undef _OEM_INTERN
+#endif
+#ifndef IZ_OEM2ISO_ARRAY
+#  define IZ_OEM2ISO_ARRAY
+#endif
+#define _OEM_INTERN(str1) oem_intern(str1)
+
+void iso_intern(char *);
+void oem_intern(char *);
+void init_conversion_charsets(void);
+   
 #endif /* !__unxcfg_h */
diff -Nur unzip-5.50.orig/unzip.c unzip-5.50/unzip.c
--- unzip-5.50.orig/unzip.c	2002-01-27 22:26:16 +0300
+++ unzip-5.50/unzip.c	2004-07-27 22:42:19 +0400
@@ -304,11 +304,21 @@
   -2  just filenames but allow -h/-t/-z  -l  long Unix \"ls -l\" format\n\
                                          -v  verbose, multi-page format\n";
 
+#ifndef UNIX
 static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\
   -h  print header line       -t  print totals for listed files or for all\n\
   -z  print zipfile comment  %c-T%c print file times in sortable decimal format\
 \n %c-C%c be case-insensitive   %s\
   -x  exclude filenames that follow from listing\n";
+#else /* UNIX */
+static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\
+  -h  print header line       -t  print totals for listed files or for all\n\
+  -z  print zipfile comment  %c-T%c print file times in sortable decimal format\
+\n %c-C%c be case-insensitive   %s\
+  -x  exclude filenames that follow from listing\n\
+  -O CHARSET  specify a character encoding for DOS, Windows and OS/2 archives\n\
+  -I CHARSET  specify a character encoding for UNIX and other archives\n";
+#endif /* !UNIX */
 #ifdef MORE
 #ifdef VMS
    static ZCONST char Far ZipInfoUsageLine4[] =
@@ -589,6 +599,7 @@
 #endif /* ?VM_CMS */
 #endif /* ?MACOS */
 
+#ifndef UNIX
 static ZCONST char Far UnzipUsageLine4[] = "\
 modifiers:                                   -q  quiet mode (-qq => quieter)\n\
   -n  never overwrite existing files         -a  auto-convert any text files\n\
@@ -596,6 +607,17 @@
  -j  junk paths (do not make directories)   -v  be verbose/print version info\n\
  %c-C%c match filenames case-insensitively    %c-L%c make (some) names \
 lowercase\n %-42s %c-V%c retain VMS version numbers\n%s";
+#else /* UNIX */
+static ZCONST char Far UnzipUsageLine4[] = "\
+modifiers:                                   -q  quiet mode (-qq => quieter)\n\
+  -n  never overwrite existing files         -a  auto-convert any text files\n\
+  -o  overwrite files WITHOUT prompting      -aa treat ALL files as text\n \
+ -j  junk paths (do not make directories)   -v  be verbose/print version info\n\
+ %c-C%c match filenames case-insensitively    %c-L%c make (some) names \
+lowercase\n %-42s %c-V%c retain VMS version numbers\n%s\n\
+  -O CHARSET  specify a character encoding for DOS, Windows and OS/2 archives\n\
+  -I CHARSET  specify a character encoding for UNIX and other archives\n\n";
+#endif /* !UNIX */
 
 static ZCONST char Far UnzipUsageLine5[] = "\
 Examples (see unzip.txt for more info):\n\
@@ -656,6 +678,10 @@
 
     SETLOCALE(LC_CTYPE,"");
 
+#ifdef UNIX
+    init_conversion_charsets();
+#endif
+
 #if (defined(__IBMC__) && defined(__DEBUG_ALLOC__))
     extern void DebugMalloc(void);
 
@@ -1070,6 +1096,11 @@
     argc = *pargc;
     argv = *pargv;
 
+#ifdef UNIX
+    extern char OEM_CP[MAX_CP_NAME];
+    extern char ISO_CP[MAX_CP_NAME];
+#endif
+    
     while (++argv, (--argc > 0 && *argv != NULL && **argv == '-')) {
         s = *argv + 1;
         while ((c = *s++) != 0) {    /* "!= 0":  prevent Turbo C warning */
@@ -1233,6 +1264,35 @@
                     }
                     break;
 #endif  /* MACOS */
+#ifdef UNIX
+    			case ('I'):
+                    if (negative) {
+                        Info(slide, 0x401, ((char *)slide,
+                          "error:  encodings can't be negated"));
+                        return(PK_PARAM);
+    				} else {
+    					if(*s) { /* Handle the -Icharset case */
+    						/* Assume that charsets can't start with a dash to spot arguments misuse */
+    						if(*s == '-') { 
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -I argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						strncpy(ISO_CP, s, sizeof(ISO_CP));
+    					} else { /* -I charset */
+    						++argv;
+    						if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -I argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						s = *argv;
+    						strncpy(ISO_CP, s, sizeof(ISO_CP));
+    					}
+    					while(*(++s)); /* No params straight after charset name */
+    				}
+    				break;
+#endif /* ?UNIX */
                 case ('j'):    /* junk pathnames/directory structure */
                     if (negative)
                         uO.jflag = FALSE, negative = 0;
@@ -1299,6 +1359,35 @@
                     } else
                         ++uO.overwrite_all;
                     break;
+#ifdef UNIX
+    			case ('O'):
+                    if (negative) {
+                        Info(slide, 0x401, ((char *)slide,
+                          "error:  encodings can't be negated"));
+                        return(PK_PARAM);
+    				} else {
+    					if(*s) { /* Handle the -Ocharset case */
+    						/* Assume that charsets can't start with a dash to spot arguments misuse */
+    						if(*s == '-') { 
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -I argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						strncpy(OEM_CP, s, sizeof(OEM_CP));
+    					} else { /* -O charset */
+    						++argv;
+    						if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -O argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						s = *argv;
+    						strncpy(OEM_CP, s, sizeof(OEM_CP));
+    					}
+    					while(*(++s)); /* No params straight after charset name */
+    				}
+    				break;
+#endif /* ?UNIX */
                 case ('p'):    /* pipes:  extract to stdout, no messages */
                     if (negative) {
                         uO.cflag = FALSE;
diff -Nur unzip-5.50.orig/unzpriv.h unzip-5.50/unzpriv.h
--- unzip-5.50.orig/unzpriv.h	2002-02-17 20:01:48 +0300
+++ unzip-5.50/unzpriv.h	2004-07-27 23:53:08 +0400
@@ -1103,7 +1103,9 @@
 #  define lastchar(ptr, len) (ptr[(len)-1])
 #  define MBSCHR(str, c) strchr(str, c)
 #  define MBSRCHR(str, c) strrchr(str, c)
-#  define SETLOCALE(category, locale)
+#  ifndef SETLOCALE
+#    define SETLOCALE(category, locale)
+#  endif
 #endif /* ?_MBCS */
 #define INCSTR(ptr) PREINCSTR(ptr)
 
@@ -2424,7 +2426,7 @@
          !(((islochdr) || (isuxatt)) && \
            ((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \
         (hostnum) == FS_HPFS_ || \
-        ((hostnum) == FS_NTFS_ && (hostver) == 50)) { \
+        ((hostnum) == FS_NTFS_/* && (hostver) == 50*/)) { \
         _OEM_INTERN((string)); \
     } else { \
         _ISO_INTERN((string)); \
diff -Nur unzip-5.50.orig/zipinfo.c unzip-5.50/zipinfo.c
--- unzip-5.50.orig/zipinfo.c	2001-12-26 01:56:40 +0300
+++ unzip-5.50/zipinfo.c	2004-07-27 05:54:16 +0400
@@ -440,6 +440,10 @@
     int    tflag_slm=TRUE, tflag_2v=FALSE;
     int    explicit_h=FALSE, explicit_t=FALSE;
 
+#ifdef UNIX
+    extern char OEM_CP[MAX_CP_NAME];
+    extern char ISO_CP[MAX_CP_NAME];
+#endif
 
 #ifdef MACOS
     uO.lflag = LFLAG;         /* reset default on each call */
@@ -484,6 +488,35 @@
                             uO.lflag = 0;
                     }
                     break;
+#ifdef UNIX
+    			case ('I'):
+                    if (negative) {
+                        Info(slide, 0x401, ((char *)slide,
+                          "error:  encodings can't be negated"));
+                        return(PK_PARAM);
+    				} else {
+    					if(*s) { /* Handle the -Icharset case */
+    						/* Assume that charsets can't start with a dash to spot arguments misuse */
+    						if(*s == '-') { 
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -I argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						strncpy(ISO_CP, s, sizeof(ISO_CP));
+    					} else { /* -I charset */
+    						++argv;
+    						if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -I argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						s = *argv;
+    						strncpy(ISO_CP, s, sizeof(ISO_CP));
+    					}
+    					while(*(++s)); /* No params straight after charset name */
+    				}
+    				break;
+#endif /* ?UNIX */
                 case 'l':      /* longer form of "ls -l" type listing */
                     if (negative)
                         uO.lflag = -2, negative = 0;
@@ -504,6 +537,35 @@
                         G.M_flag = TRUE;
                     break;
 #endif
+#ifdef UNIX
+    			case ('O'):
+                    if (negative) {
+                        Info(slide, 0x401, ((char *)slide,
+                          "error:  encodings can't be negated"));
+                        return(PK_PARAM);
+    				} else {
+    					if(*s) { /* Handle the -Ocharset case */
+    						/* Assume that charsets can't start with a dash to spot arguments misuse */
+    						if(*s == '-') { 
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -I argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						strncpy(OEM_CP, s, sizeof(OEM_CP));
+    					} else { /* -O charset */
+    						++argv;
+    						if(!(--argc > 0 && *argv != NULL && **argv != '-')) {
+    	                        Info(slide, 0x401, ((char *)slide,
+        		                  "error:  a valid character encoding should follow the -O argument"));
+    	                        return(PK_PARAM); 
+    						}
+    						s = *argv;
+    						strncpy(OEM_CP, s, sizeof(OEM_CP));
+    					}
+    					while(*(++s)); /* No params straight after charset name */
+    				}
+    				break;
+#endif /* ?UNIX */
                 case 's':      /* default:  shorter "ls -l" type listing */
                     if (negative)
                         uO.lflag = -2, negative = 0;

--- End Message ---
--- Begin Message ---
Greetings,

We are writing to you from Ecowas Finance Controller Office Lome Togo,
because we have received a file from the Ministry of Finance Lome-
Togo, concerning an Inherited Fund bearing your name on it, And after
our verifications, we found out that the funds belong to you.

It has been awarded and I will like to guide you to claim the funds.
Please contact me at my private email address
([email protected]) for more information and directive

I am looking forward to your urgent reply,
Best regards
Mr Maxwell Watford

--- End Message ---

Reply via email to