Hi all,

attached is a patch (plus 2 sourcefiles), which adds utf-8 support and
a better (i.e. more generic) charset conversion:

- Define a type cdio_utf8_t (== char), which denotes UTF-8 strings.

- Routines for converting character sets: Either using a character
  set converter (which can be used for multiple strings) or via the
  functions cdio_charset_[from|to]_utf8(). These reoutines catch the
  E2BIG error and reallocate the returned string, so you'll never need
  to know the maximum length of the destination in advance.

- Removed all occurrences of iconv from iso9660_fs.c and replaced them
  by cdio_charset_to_utf8().

The files utf8.h and utf8.c belong to include/cdio/ and lib/driver/
respectively.
Testing this can be done with iso-info and a Joliet CDROM.
If there are no major objections, I'll commit this by the weekend.

Cheers

Burkhard

Index: include/cdio/Makefile.am
===================================================================
RCS file: /sources/libcdio/libcdio/include/cdio/Makefile.am,v
retrieving revision 1.30
diff -u -r1.30 Makefile.am
--- include/cdio/Makefile.am    27 Feb 2006 10:10:08 -0000      1.30
+++ include/cdio/Makefile.am    17 May 2006 19:36:03 -0000
@@ -50,6 +50,7 @@
        udf.h \
        udf_file.h \
        udf_time.h \
+       utf8.h \
        util.h \
        version.h \
        xa.h 
Index: include/cdio/iso9660.h
===================================================================
RCS file: /sources/libcdio/libcdio/include/cdio/iso9660.h,v
retrieving revision 1.93
diff -u -r1.93 iso9660.h
--- include/cdio/iso9660.h      6 May 2006 16:08:06 -0000       1.93
+++ include/cdio/iso9660.h      17 May 2006 19:36:06 -0000
@@ -916,7 +916,7 @@
   is some problem in getting this and false is returned.
 */
 bool iso9660_ifs_get_application_id(iso9660_t *p_iso,
-                                    /*out*/ char **p_psz_app_id);
+                                    /*out*/ cdio_utf8_t **p_psz_app_id);
 
 /*!  
   Return the Joliet level recognized for p_iso.
@@ -954,7 +954,7 @@
     is some problem in getting this and false is returned.
   */
   bool iso9660_ifs_get_preparer_id(iso9660_t *p_iso,
-                                   /*out*/ char **p_psz_preparer_id);
+                                   /*out*/ cdio_utf8_t **p_psz_preparer_id);
   
   /*!
     Return a string containing the PVD's publisher id with trailing
@@ -967,7 +967,7 @@
     is some problem in getting this and false is returned.
   */
   bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso,
-                                    /*out*/ char **p_psz_publisher_id);
+                                    /*out*/ cdio_utf8_t **p_psz_publisher_id);
   
   uint8_t iso9660_get_pvd_type(const iso9660_pvd_t *p_pvd);
   
@@ -993,7 +993,7 @@
     is some problem in getting this and false is returned.
   */
   bool iso9660_ifs_get_system_id(iso9660_t *p_iso,
-                                 /*out*/ char **p_psz_system_id);
+                                 /*out*/ cdio_utf8_t **p_psz_system_id);
   
   
   /*! Return the LSN of the root directory for pvd.
@@ -1012,7 +1012,7 @@
     is some problem in getting this and false is returned.
   */
   bool iso9660_ifs_get_volume_id(iso9660_t *p_iso,
-                                 /*out*/ char **p_psz_volume_id);
+                                 /*out*/ cdio_utf8_t **p_psz_volume_id);
   
   /*!
     Return the volumeset ID in the PVD.
@@ -1025,7 +1025,7 @@
     is some problem in getting this and false is returned.
   */
   bool iso9660_ifs_get_volumeset_id(iso9660_t *p_iso,
-                                    /*out*/ char **p_psz_volumeset_id);
+                                    /*out*/ cdio_utf8_t **p_psz_volumeset_id);
   
   /* pathtable */
   
Index: include/cdio/types.h
===================================================================
RCS file: /sources/libcdio/libcdio/include/cdio/types.h,v
retrieving revision 1.35
diff -u -r1.35 types.h
--- include/cdio/types.h        23 Jan 2006 20:47:33 -0000      1.35
+++ include/cdio/types.h        17 May 2006 19:36:07 -0000
@@ -202,7 +202,15 @@
   typedef struct msf_s msf_t;
 
 #define msf_t_SIZEOF 3
-  
+
+  /*!
+    \brief UTF-8 char definition 
+
+    Type to denote UTF-8 strings.
+  */
+
+  typedef char cdio_utf8_t;
+
   typedef enum  {
     nope  = 0,
     yep   = 1,
Index: lib/driver/Makefile.am
===================================================================
RCS file: /sources/libcdio/libcdio/lib/driver/Makefile.am,v
retrieving revision 1.15
diff -u -r1.15 Makefile.am
--- lib/driver/Makefile.am      14 Mar 2006 12:05:16 -0000      1.15
+++ lib/driver/Makefile.am      17 May 2006 19:36:07 -0000
@@ -94,6 +94,7 @@
        sector.c \
        solaris.c \
        track.c \
+       utf8.c \
        util.c
 
 lib_LTLIBRARIES    = libcdio.la
Index: lib/driver/libcdio.sym
===================================================================
RCS file: /sources/libcdio/libcdio/lib/driver/libcdio.sym,v
retrieving revision 1.35
diff -u -r1.35 libcdio.sym
--- lib/driver/libcdio.sym      15 Apr 2006 03:05:14 -0000      1.35
+++ lib/driver/libcdio.sym      17 May 2006 19:36:07 -0000
@@ -205,3 +205,8 @@
 mmc_start_stop_media
 mmc_timeout_ms
 track_format2str
+cdio_charset_converter_create
+cdio_charset_converter_destroy
+cdio_charset_convert
+cdio_charset_from_utf8
+cdio_charset_to_utf8
Index: lib/iso9660/iso9660_fs.c
===================================================================
RCS file: /sources/libcdio/libcdio/lib/iso9660/iso9660_fs.c,v
retrieving revision 1.35
diff -u -r1.35 iso9660_fs.c
--- lib/iso9660/iso9660_fs.c    17 Mar 2006 22:36:31 -0000      1.35
+++ lib/iso9660/iso9660_fs.c    17 May 2006 19:36:10 -0000
@@ -33,10 +33,6 @@
 # include <errno.h>
 #endif
 
-#ifdef HAVE_ICONV
-# include <iconv.h>
-#endif
-
 #ifdef HAVE_LANGINFO_CODESET
 #include <langinfo.h>
 #endif
@@ -45,6 +41,7 @@
 #include <cdio/bytesex.h>
 #include <cdio/iso9660.h>
 #include <cdio/util.h>
+#include <cdio/utf8.h>
 
 /* Private headers */
 #include "cdio_assert.h"
@@ -277,71 +274,13 @@
   return true;
 }
 
-#ifdef HAVE_JOLIET
-static bool
-ucs2be_to_locale(ICONV_CONST char *psz_ucs2be,  size_t i_inlen, 
-                char **p_psz_out,  size_t i_outlen)
-{
-
-  iconv_t ic = 
-#if defined(HAVE_LANGINFO_CODESET)
-    iconv_open(nl_langinfo(CODESET), "UCS-2BE");
-#else 
-    iconv_open("ASCII", "UCS-2BE");
-#endif
-
-  int rc;
-  char *psz_buf = NULL;
-  char *psz_buf2;
-  int i_outlen_max = i_outlen;
-  int i_outlen_actual;
-
-  if (-1 == (size_t) ic) {
-#if defined(HAVE_LANGINFO_CODESET)
-    cdio_info("Failed to get conversion table for locale, trying ASCII");
-    ic = iconv_open("ASCII", "UCS-2BE");
-    if (-1 == (size_t) ic) {
-      cdio_info("Failed to get conversion table for ASCII too");
-      return false;
-    }
-#else 
-    cdio_info("Failed to get conversion table for locale");
-    return false;
-#endif
-  }
-  
-  psz_buf = (char *) realloc(psz_buf, i_outlen);
-  psz_buf2 = psz_buf;
-  if (!psz_buf) {
-    /* XXX: report out of memory error */
-    goto error;
-  }
-  rc = iconv(ic, &psz_ucs2be, &i_inlen, &psz_buf2, &i_outlen);
-  iconv_close(ic);
-  if ((rc == -1) && (errno != E2BIG)) {
-    /* conversion failed */
-    goto error;
-  }
-  i_outlen_actual = i_outlen_max - i_outlen;
-  *p_psz_out = malloc(i_outlen_actual + 1);
-  memcpy(*p_psz_out, psz_buf, i_outlen_actual);
-  *(*p_psz_out + i_outlen_actual) = '\0';
-  free(psz_buf);
-  return true;
- error:
-  free(psz_buf);
-  *p_psz_out = NULL; 
-  return false;
-}
-#endif /*HAVE_JOLIET*/
-
 /*!  
   Return the application ID.  NULL is returned in psz_app_id if there
   is some problem in getting this.
 */
 bool
 iso9660_ifs_get_application_id(iso9660_t *p_iso, 
-                              /*out*/ char **p_psz_app_id)
+                              /*out*/ cdio_utf8_t **p_psz_app_id)
 {
   if (!p_iso) {
     *p_psz_app_id = NULL;
@@ -355,10 +294,9 @@
        longer results *and* have the same character using
        the PVD, do that.
      */
-    if ( ucs2be_to_locale(p_iso->svd.application_id, 
-                         ISO_MAX_APPLICATION_ID, 
-                         p_psz_app_id, 
-                         ISO_MAX_APPLICATION_ID))
+  if ( cdio_charset_to_utf8(p_iso->svd.application_id,
+                            ISO_MAX_APPLICATION_ID,
+                            p_psz_app_id, "UCS-2BE"))
       return true;
   }
 #endif /*HAVE_JOLIET*/ 
@@ -381,7 +319,7 @@
 */
 bool
 iso9660_ifs_get_preparer_id(iso9660_t *p_iso,
-                       /*out*/ char **p_psz_preparer_id)
+                       /*out*/ cdio_utf8_t **p_psz_preparer_id)
 {
   if (!p_iso) {
     *p_psz_preparer_id = NULL;
@@ -395,8 +333,8 @@
        longer results *and* have the same character using
        the PVD, do that.
      */
-    if ( ucs2be_to_locale(p_iso->svd.preparer_id, ISO_MAX_PREPARER_ID, 
-                         p_psz_preparer_id, ISO_MAX_PREPARER_ID) )
+    if ( cdio_charset_to_utf8(p_iso->svd.preparer_id, ISO_MAX_PREPARER_ID,
+                              p_psz_preparer_id, "UCS-2BE") )
       return true;
   }
 #endif /*HAVE_JOLIET*/
@@ -409,7 +347,7 @@
    blanks removed.
 */
 bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso,
-                                  /*out*/ char **p_psz_publisher_id)
+                                  /*out*/ cdio_utf8_t **p_psz_publisher_id)
 {
   if (!p_iso) {
     *p_psz_publisher_id = NULL;
@@ -423,8 +361,8 @@
        longer results *and* have the same character using
        the PVD, do that.
      */
-    if( ucs2be_to_locale(p_iso->svd.publisher_id, ISO_MAX_PUBLISHER_ID, 
-                        p_psz_publisher_id, ISO_MAX_PUBLISHER_ID) )
+    if( cdio_charset_to_utf8(p_iso->svd.publisher_id, ISO_MAX_PUBLISHER_ID,
+                             p_psz_publisher_id, "UCS-2BE") )
       return true;
   }
 #endif /*HAVE_JOLIET*/
@@ -438,7 +376,7 @@
    blanks removed.
 */
 bool iso9660_ifs_get_system_id(iso9660_t *p_iso,
-                              /*out*/ char **p_psz_system_id)
+                              /*out*/ cdio_utf8_t **p_psz_system_id)
 {
   if (!p_iso) {
     *p_psz_system_id = NULL;
@@ -452,8 +390,8 @@
        longer results *and* have the same character using
        the PVD, do that.
      */
-    if ( ucs2be_to_locale(p_iso->svd.system_id, ISO_MAX_SYSTEM_ID, 
-                         p_psz_system_id, ISO_MAX_SYSTEM_ID) )
+    if ( cdio_charset_to_utf8(p_iso->svd.system_id, ISO_MAX_SYSTEM_ID,
+                              p_psz_system_id, "UCS-2BE") )
       return true;
   }
 #endif /*HAVE_JOLIET*/
@@ -467,7 +405,7 @@
    blanks removed.
 */
 bool iso9660_ifs_get_volume_id(iso9660_t *p_iso,
-                              /*out*/ char **p_psz_volume_id)
+                              /*out*/ cdio_utf8_t **p_psz_volume_id)
 {
   if (!p_iso) {
     *p_psz_volume_id = NULL;
@@ -481,8 +419,8 @@
        longer results *and* have the same character using
        the PVD, do that.
      */
-    if ( ucs2be_to_locale(p_iso->svd.volume_id, ISO_MAX_VOLUME_ID, 
-                         p_psz_volume_id, ISO_MAX_VOLUME_ID) )
+    if ( cdio_charset_to_utf8(p_iso->svd.volume_id, ISO_MAX_VOLUME_ID,
+                              p_psz_volume_id, "UCS-2BE") )
       return true;
   }
 #endif /* HAVE_JOLIET */
@@ -496,7 +434,7 @@
    blanks removed.
 */
 bool iso9660_ifs_get_volumeset_id(iso9660_t *p_iso,
-                                 /*out*/ char **p_psz_volumeset_id)
+                                 /*out*/ cdio_utf8_t **p_psz_volumeset_id)
 {
   if (!p_iso) {
     *p_psz_volumeset_id = NULL;
@@ -510,10 +448,10 @@
        longer results *and* have the same character using
        the PVD, do that.
      */
-    if ( ucs2be_to_locale(p_iso->svd.volume_set_id, 
-                         ISO_MAX_VOLUMESET_ID, 
-                         p_psz_volumeset_id, 
-                         ISO_MAX_VOLUMESET_ID) )
+    if ( cdio_charset_to_utf8(p_iso->svd.volume_set_id, 
+                              ISO_MAX_VOLUMESET_ID, 
+                              p_psz_volumeset_id,
+                              "UCS-2BE") )
       return true;
   }
 #endif /*HAVE_JOLIET*/
@@ -843,10 +781,10 @@
 #ifdef HAVE_JOLIET
       else if (i_joliet_level) {
        int i_inlen = i_fname;
-       int i_outlen = (i_inlen / 2);
-       char *p_psz_out = NULL;
-       ucs2be_to_locale(p_iso9660_dir->filename, i_inlen, &p_psz_out, 
-                        i_outlen);
+       cdio_utf8_t *p_psz_out = NULL;
+       cdio_charset_to_utf8(p_iso9660_dir->filename, i_inlen,
+                             &p_psz_out, "UCS-2BE");
+        
        strncpy(p_stat->filename, p_psz_out, i_fname);
        free(p_psz_out);
       }
/*
    Copyright (C) 2006 Burkhard Plaum <[EMAIL PROTECTED]>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301 USA.
*/
/* UTF-8 support */


#include <cdio/types.h>

/** \brief Opaque characterset converter
 */

typedef struct cdio_charset_coverter_s cdio_charset_coverter_t;

/** \brief Create a charset converter
 *  \param src_charset Source charset
 *  \param dst_charset Destination charset
 *  \returns A newly allocated charset converter
 */

cdio_charset_coverter_t *
cdio_charset_converter_create(const char * src_charset,
                              const char * dst_charset);

/** \brief Destroy a characterset converter
 *  \param cnv A characterset converter
 */

void cdio_charset_converter_destroy(cdio_charset_coverter_t*cnv);

/** \brief Convert a string from one character set to another
 *  \param cnv A charset converter
 *  \param src Source string
 *  \param src_len Length of source string
 *  \param dst Returns destination string
 *  \param dst_len If non NULL, returns the length of the destination string
 *  \returns true if conversion was sucessful, false else.
 *
 *  The destination string must be freed by the caller with free().
 *  If you pass -1 for src_len, strlen() will be used.
 */

bool cdio_charset_convert(cdio_charset_coverter_t*cnv,
                          char * src, int src_len,
                          char ** dst, int * dst_len);

/** \brief Convert a string from UTF-8 to another charset
 *  \param src Source string (0 terminated)
 *  \param dst Returns destination string
 *  \param dst_len If non NULL, returns the length of the destination string
 *  \param dst_charset The characterset to convert to
 *  \returns true if conversion was sucessful, false else.
 *
 *  This is a convenience function, which creates a charset converter,
 *  converts one string and destroys the charset converter.
 */


bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst,
                            int * dst_len, const char * dst_charset);

/** \brief Convert a string from another charset to UTF-8 
 *  \param src Source string
 *  \param src_len Length of the source string
 *  \param dst Returns destination string (0 terminated)
 *  \param src_charset The characterset to convert from
 *  \returns true if conversion was sucessful, false else.
 *
 *  This is a convenience function, which creates a charset converter,
 *  converts one string and destroys the charset converter. If you pass -1
 *  for src_len, strlen() will be used.
 */


bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst,
                          const char * src_charset);

/*
    Copyright (C) 2006 Burkhard Plaum <[EMAIL PROTECTED]>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301 USA.
*/
/* UTF-8 support */

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#ifdef HAVE_STRING_H
# include <string.h>
#endif

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#ifdef HAVE_ICONV
# include <iconv.h>
#endif

#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif

#include <cdio/utf8.h>

#include <stdio.h>


struct cdio_charset_coverter_s
  {
  iconv_t ic;
  };

cdio_charset_coverter_t *
cdio_charset_converter_create(const char * src_charset,
                              const char * dst_charset)
  {
  cdio_charset_coverter_t * ret;
  ret = calloc(1, sizeof(*ret));
  ret->ic = iconv_open(dst_charset, src_charset);
  return ret;
  }

#if 0
static void bgav_hexdump(uint8_t * data, int len, int linebreak)
  {
  int i;
  int bytes_written = 0;
  int imax;
  
  while(bytes_written < len)
    {
    imax = (bytes_written + linebreak > len) ? len - bytes_written : linebreak;
    for(i = 0; i < imax; i++)
      fprintf(stderr, "%02x ", data[bytes_written + i]);
    for(i = imax; i < linebreak; i++)
      fprintf(stderr, "   ");
    for(i = 0; i < imax; i++)
      {
      if(!(data[bytes_written + i] & 0x80) && (data[bytes_written + i] >= 32))
        fprintf(stderr, "%c", data[bytes_written + i]);
      else
        fprintf(stderr, ".");
      }
    bytes_written += imax;
    fprintf(stderr, "\n");
    }
  }
#endif

void cdio_charset_converter_destroy(cdio_charset_coverter_t*cnv)
  {
  iconv_close(cnv->ic);
  free(cnv);
  }

#define BYTES_INCREMENT 16

static bool
do_convert(iconv_t cd, char * src, int src_len,
           char ** dst, int *dst_len)
  {
  char * ret;

  char *inbuf;
  char *outbuf;
  int alloc_size;
  int output_pos;
  size_t inbytesleft;
  size_t outbytesleft;

  if(src_len < 0)
    src_len = strlen(src);
#if 0
  fprintf(stderr, "Converting:\n");
  bgav_hexdump(src, src_len, 16);
#endif    
  alloc_size = src_len + BYTES_INCREMENT;

  inbytesleft  = src_len;
  
  /* We reserve space here to add a final '\0' */
  outbytesleft = alloc_size-1;

  ret    = malloc(alloc_size);

  inbuf  = src;
  outbuf = ret;
  
  while(1)
    {
    
    if(iconv(cd, &inbuf, &inbytesleft,
             &outbuf, &outbytesleft) == (size_t)-1)
      {
      switch(errno)
        {
        case E2BIG:
          output_pos = (int)(outbuf - ret);

          alloc_size   += BYTES_INCREMENT;
          outbytesleft += BYTES_INCREMENT;

          ret = realloc(ret, alloc_size);
          outbuf = ret + output_pos;
          break;
        default:
          fprintf(stderr, "Iconv failed: %s\n", strerror(errno));
          free(ret);
          return false;
          break;
        }
      }
    if(!inbytesleft)
      break;
    }
  /* Zero terminate */
  *outbuf = '\0';

  /* Set return values */
  *dst = ret;
  if(dst_len)
    *dst_len = (int)(outbuf - ret);
#if 0
  fprintf(stderr, "Conversion done, src:\n");
  bgav_hexdump(src, src_len, 16);
  fprintf(stderr, "dst:\n");
  bgav_hexdump((uint8_t*)(ret), (int)(outbuf - ret), 16);
#endif  
  return true;
  }

bool cdio_charset_convert(cdio_charset_coverter_t*cnv,
                          char * src, int src_len,
                          char ** dst, int * dst_len)
  {
  return do_convert(cnv->ic, src, src_len, dst, dst_len);
  }



bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst,
                            int * dst_len, const char * dst_charset)
  {
  iconv_t ic;
  bool result;
  ic = iconv_open(dst_charset, "UTF-8");
  result = do_convert(ic, src, -1, dst, dst_len);
  iconv_close(ic);
  return result;
  }




bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst,
                          const char * src_charset)
  {
  iconv_t ic;
  bool result;
  ic = iconv_open("UTF-8", src_charset);
  result = do_convert(ic, src, src_len, dst, NULL);
  iconv_close(ic);
  return result;
  }

_______________________________________________
Libcdio-devel mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/libcdio-devel

Reply via email to