Hi all,
attached is a patch (plus 2 sourcefiles), which adds utf-8 support and
a better (i.e. more generic) charset conversion:
- Define a type cdio_utf8_t (== char), which denotes UTF-8 strings.
- Routines for converting character sets: Either using a character
set converter (which can be used for multiple strings) or via the
functions cdio_charset_[from|to]_utf8(). These reoutines catch the
E2BIG error and reallocate the returned string, so you'll never need
to know the maximum length of the destination in advance.
- Removed all occurrences of iconv from iso9660_fs.c and replaced them
by cdio_charset_to_utf8().
The files utf8.h and utf8.c belong to include/cdio/ and lib/driver/
respectively.
Testing this can be done with iso-info and a Joliet CDROM.
If there are no major objections, I'll commit this by the weekend.
Cheers
Burkhard
Index: include/cdio/Makefile.am
===================================================================
RCS file: /sources/libcdio/libcdio/include/cdio/Makefile.am,v
retrieving revision 1.30
diff -u -r1.30 Makefile.am
--- include/cdio/Makefile.am 27 Feb 2006 10:10:08 -0000 1.30
+++ include/cdio/Makefile.am 17 May 2006 19:36:03 -0000
@@ -50,6 +50,7 @@
udf.h \
udf_file.h \
udf_time.h \
+ utf8.h \
util.h \
version.h \
xa.h
Index: include/cdio/iso9660.h
===================================================================
RCS file: /sources/libcdio/libcdio/include/cdio/iso9660.h,v
retrieving revision 1.93
diff -u -r1.93 iso9660.h
--- include/cdio/iso9660.h 6 May 2006 16:08:06 -0000 1.93
+++ include/cdio/iso9660.h 17 May 2006 19:36:06 -0000
@@ -916,7 +916,7 @@
is some problem in getting this and false is returned.
*/
bool iso9660_ifs_get_application_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_app_id);
+ /*out*/ cdio_utf8_t **p_psz_app_id);
/*!
Return the Joliet level recognized for p_iso.
@@ -954,7 +954,7 @@
is some problem in getting this and false is returned.
*/
bool iso9660_ifs_get_preparer_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_preparer_id);
+ /*out*/ cdio_utf8_t **p_psz_preparer_id);
/*!
Return a string containing the PVD's publisher id with trailing
@@ -967,7 +967,7 @@
is some problem in getting this and false is returned.
*/
bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_publisher_id);
+ /*out*/ cdio_utf8_t **p_psz_publisher_id);
uint8_t iso9660_get_pvd_type(const iso9660_pvd_t *p_pvd);
@@ -993,7 +993,7 @@
is some problem in getting this and false is returned.
*/
bool iso9660_ifs_get_system_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_system_id);
+ /*out*/ cdio_utf8_t **p_psz_system_id);
/*! Return the LSN of the root directory for pvd.
@@ -1012,7 +1012,7 @@
is some problem in getting this and false is returned.
*/
bool iso9660_ifs_get_volume_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_volume_id);
+ /*out*/ cdio_utf8_t **p_psz_volume_id);
/*!
Return the volumeset ID in the PVD.
@@ -1025,7 +1025,7 @@
is some problem in getting this and false is returned.
*/
bool iso9660_ifs_get_volumeset_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_volumeset_id);
+ /*out*/ cdio_utf8_t **p_psz_volumeset_id);
/* pathtable */
Index: include/cdio/types.h
===================================================================
RCS file: /sources/libcdio/libcdio/include/cdio/types.h,v
retrieving revision 1.35
diff -u -r1.35 types.h
--- include/cdio/types.h 23 Jan 2006 20:47:33 -0000 1.35
+++ include/cdio/types.h 17 May 2006 19:36:07 -0000
@@ -202,7 +202,15 @@
typedef struct msf_s msf_t;
#define msf_t_SIZEOF 3
-
+
+ /*!
+ \brief UTF-8 char definition
+
+ Type to denote UTF-8 strings.
+ */
+
+ typedef char cdio_utf8_t;
+
typedef enum {
nope = 0,
yep = 1,
Index: lib/driver/Makefile.am
===================================================================
RCS file: /sources/libcdio/libcdio/lib/driver/Makefile.am,v
retrieving revision 1.15
diff -u -r1.15 Makefile.am
--- lib/driver/Makefile.am 14 Mar 2006 12:05:16 -0000 1.15
+++ lib/driver/Makefile.am 17 May 2006 19:36:07 -0000
@@ -94,6 +94,7 @@
sector.c \
solaris.c \
track.c \
+ utf8.c \
util.c
lib_LTLIBRARIES = libcdio.la
Index: lib/driver/libcdio.sym
===================================================================
RCS file: /sources/libcdio/libcdio/lib/driver/libcdio.sym,v
retrieving revision 1.35
diff -u -r1.35 libcdio.sym
--- lib/driver/libcdio.sym 15 Apr 2006 03:05:14 -0000 1.35
+++ lib/driver/libcdio.sym 17 May 2006 19:36:07 -0000
@@ -205,3 +205,8 @@
mmc_start_stop_media
mmc_timeout_ms
track_format2str
+cdio_charset_converter_create
+cdio_charset_converter_destroy
+cdio_charset_convert
+cdio_charset_from_utf8
+cdio_charset_to_utf8
Index: lib/iso9660/iso9660_fs.c
===================================================================
RCS file: /sources/libcdio/libcdio/lib/iso9660/iso9660_fs.c,v
retrieving revision 1.35
diff -u -r1.35 iso9660_fs.c
--- lib/iso9660/iso9660_fs.c 17 Mar 2006 22:36:31 -0000 1.35
+++ lib/iso9660/iso9660_fs.c 17 May 2006 19:36:10 -0000
@@ -33,10 +33,6 @@
# include <errno.h>
#endif
-#ifdef HAVE_ICONV
-# include <iconv.h>
-#endif
-
#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif
@@ -45,6 +41,7 @@
#include <cdio/bytesex.h>
#include <cdio/iso9660.h>
#include <cdio/util.h>
+#include <cdio/utf8.h>
/* Private headers */
#include "cdio_assert.h"
@@ -277,71 +274,13 @@
return true;
}
-#ifdef HAVE_JOLIET
-static bool
-ucs2be_to_locale(ICONV_CONST char *psz_ucs2be, size_t i_inlen,
- char **p_psz_out, size_t i_outlen)
-{
-
- iconv_t ic =
-#if defined(HAVE_LANGINFO_CODESET)
- iconv_open(nl_langinfo(CODESET), "UCS-2BE");
-#else
- iconv_open("ASCII", "UCS-2BE");
-#endif
-
- int rc;
- char *psz_buf = NULL;
- char *psz_buf2;
- int i_outlen_max = i_outlen;
- int i_outlen_actual;
-
- if (-1 == (size_t) ic) {
-#if defined(HAVE_LANGINFO_CODESET)
- cdio_info("Failed to get conversion table for locale, trying ASCII");
- ic = iconv_open("ASCII", "UCS-2BE");
- if (-1 == (size_t) ic) {
- cdio_info("Failed to get conversion table for ASCII too");
- return false;
- }
-#else
- cdio_info("Failed to get conversion table for locale");
- return false;
-#endif
- }
-
- psz_buf = (char *) realloc(psz_buf, i_outlen);
- psz_buf2 = psz_buf;
- if (!psz_buf) {
- /* XXX: report out of memory error */
- goto error;
- }
- rc = iconv(ic, &psz_ucs2be, &i_inlen, &psz_buf2, &i_outlen);
- iconv_close(ic);
- if ((rc == -1) && (errno != E2BIG)) {
- /* conversion failed */
- goto error;
- }
- i_outlen_actual = i_outlen_max - i_outlen;
- *p_psz_out = malloc(i_outlen_actual + 1);
- memcpy(*p_psz_out, psz_buf, i_outlen_actual);
- *(*p_psz_out + i_outlen_actual) = '\0';
- free(psz_buf);
- return true;
- error:
- free(psz_buf);
- *p_psz_out = NULL;
- return false;
-}
-#endif /*HAVE_JOLIET*/
-
/*!
Return the application ID. NULL is returned in psz_app_id if there
is some problem in getting this.
*/
bool
iso9660_ifs_get_application_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_app_id)
+ /*out*/ cdio_utf8_t **p_psz_app_id)
{
if (!p_iso) {
*p_psz_app_id = NULL;
@@ -355,10 +294,9 @@
longer results *and* have the same character using
the PVD, do that.
*/
- if ( ucs2be_to_locale(p_iso->svd.application_id,
- ISO_MAX_APPLICATION_ID,
- p_psz_app_id,
- ISO_MAX_APPLICATION_ID))
+ if ( cdio_charset_to_utf8(p_iso->svd.application_id,
+ ISO_MAX_APPLICATION_ID,
+ p_psz_app_id, "UCS-2BE"))
return true;
}
#endif /*HAVE_JOLIET*/
@@ -381,7 +319,7 @@
*/
bool
iso9660_ifs_get_preparer_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_preparer_id)
+ /*out*/ cdio_utf8_t **p_psz_preparer_id)
{
if (!p_iso) {
*p_psz_preparer_id = NULL;
@@ -395,8 +333,8 @@
longer results *and* have the same character using
the PVD, do that.
*/
- if ( ucs2be_to_locale(p_iso->svd.preparer_id, ISO_MAX_PREPARER_ID,
- p_psz_preparer_id, ISO_MAX_PREPARER_ID) )
+ if ( cdio_charset_to_utf8(p_iso->svd.preparer_id, ISO_MAX_PREPARER_ID,
+ p_psz_preparer_id, "UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
@@ -409,7 +347,7 @@
blanks removed.
*/
bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_publisher_id)
+ /*out*/ cdio_utf8_t **p_psz_publisher_id)
{
if (!p_iso) {
*p_psz_publisher_id = NULL;
@@ -423,8 +361,8 @@
longer results *and* have the same character using
the PVD, do that.
*/
- if( ucs2be_to_locale(p_iso->svd.publisher_id, ISO_MAX_PUBLISHER_ID,
- p_psz_publisher_id, ISO_MAX_PUBLISHER_ID) )
+ if( cdio_charset_to_utf8(p_iso->svd.publisher_id, ISO_MAX_PUBLISHER_ID,
+ p_psz_publisher_id, "UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
@@ -438,7 +376,7 @@
blanks removed.
*/
bool iso9660_ifs_get_system_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_system_id)
+ /*out*/ cdio_utf8_t **p_psz_system_id)
{
if (!p_iso) {
*p_psz_system_id = NULL;
@@ -452,8 +390,8 @@
longer results *and* have the same character using
the PVD, do that.
*/
- if ( ucs2be_to_locale(p_iso->svd.system_id, ISO_MAX_SYSTEM_ID,
- p_psz_system_id, ISO_MAX_SYSTEM_ID) )
+ if ( cdio_charset_to_utf8(p_iso->svd.system_id, ISO_MAX_SYSTEM_ID,
+ p_psz_system_id, "UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
@@ -467,7 +405,7 @@
blanks removed.
*/
bool iso9660_ifs_get_volume_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_volume_id)
+ /*out*/ cdio_utf8_t **p_psz_volume_id)
{
if (!p_iso) {
*p_psz_volume_id = NULL;
@@ -481,8 +419,8 @@
longer results *and* have the same character using
the PVD, do that.
*/
- if ( ucs2be_to_locale(p_iso->svd.volume_id, ISO_MAX_VOLUME_ID,
- p_psz_volume_id, ISO_MAX_VOLUME_ID) )
+ if ( cdio_charset_to_utf8(p_iso->svd.volume_id, ISO_MAX_VOLUME_ID,
+ p_psz_volume_id, "UCS-2BE") )
return true;
}
#endif /* HAVE_JOLIET */
@@ -496,7 +434,7 @@
blanks removed.
*/
bool iso9660_ifs_get_volumeset_id(iso9660_t *p_iso,
- /*out*/ char **p_psz_volumeset_id)
+ /*out*/ cdio_utf8_t **p_psz_volumeset_id)
{
if (!p_iso) {
*p_psz_volumeset_id = NULL;
@@ -510,10 +448,10 @@
longer results *and* have the same character using
the PVD, do that.
*/
- if ( ucs2be_to_locale(p_iso->svd.volume_set_id,
- ISO_MAX_VOLUMESET_ID,
- p_psz_volumeset_id,
- ISO_MAX_VOLUMESET_ID) )
+ if ( cdio_charset_to_utf8(p_iso->svd.volume_set_id,
+ ISO_MAX_VOLUMESET_ID,
+ p_psz_volumeset_id,
+ "UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
@@ -843,10 +781,10 @@
#ifdef HAVE_JOLIET
else if (i_joliet_level) {
int i_inlen = i_fname;
- int i_outlen = (i_inlen / 2);
- char *p_psz_out = NULL;
- ucs2be_to_locale(p_iso9660_dir->filename, i_inlen, &p_psz_out,
- i_outlen);
+ cdio_utf8_t *p_psz_out = NULL;
+ cdio_charset_to_utf8(p_iso9660_dir->filename, i_inlen,
+ &p_psz_out, "UCS-2BE");
+
strncpy(p_stat->filename, p_psz_out, i_fname);
free(p_psz_out);
}
/*
Copyright (C) 2006 Burkhard Plaum <[EMAIL PROTECTED]>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
*/
/* UTF-8 support */
#include <cdio/types.h>
/** \brief Opaque characterset converter
*/
typedef struct cdio_charset_coverter_s cdio_charset_coverter_t;
/** \brief Create a charset converter
* \param src_charset Source charset
* \param dst_charset Destination charset
* \returns A newly allocated charset converter
*/
cdio_charset_coverter_t *
cdio_charset_converter_create(const char * src_charset,
const char * dst_charset);
/** \brief Destroy a characterset converter
* \param cnv A characterset converter
*/
void cdio_charset_converter_destroy(cdio_charset_coverter_t*cnv);
/** \brief Convert a string from one character set to another
* \param cnv A charset converter
* \param src Source string
* \param src_len Length of source string
* \param dst Returns destination string
* \param dst_len If non NULL, returns the length of the destination string
* \returns true if conversion was sucessful, false else.
*
* The destination string must be freed by the caller with free().
* If you pass -1 for src_len, strlen() will be used.
*/
bool cdio_charset_convert(cdio_charset_coverter_t*cnv,
char * src, int src_len,
char ** dst, int * dst_len);
/** \brief Convert a string from UTF-8 to another charset
* \param src Source string (0 terminated)
* \param dst Returns destination string
* \param dst_len If non NULL, returns the length of the destination string
* \param dst_charset The characterset to convert to
* \returns true if conversion was sucessful, false else.
*
* This is a convenience function, which creates a charset converter,
* converts one string and destroys the charset converter.
*/
bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst,
int * dst_len, const char * dst_charset);
/** \brief Convert a string from another charset to UTF-8
* \param src Source string
* \param src_len Length of the source string
* \param dst Returns destination string (0 terminated)
* \param src_charset The characterset to convert from
* \returns true if conversion was sucessful, false else.
*
* This is a convenience function, which creates a charset converter,
* converts one string and destroys the charset converter. If you pass -1
* for src_len, strlen() will be used.
*/
bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst,
const char * src_charset);
/*
Copyright (C) 2006 Burkhard Plaum <[EMAIL PROTECTED]>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
*/
/* UTF-8 support */
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_ICONV
# include <iconv.h>
#endif
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#include <cdio/utf8.h>
#include <stdio.h>
struct cdio_charset_coverter_s
{
iconv_t ic;
};
cdio_charset_coverter_t *
cdio_charset_converter_create(const char * src_charset,
const char * dst_charset)
{
cdio_charset_coverter_t * ret;
ret = calloc(1, sizeof(*ret));
ret->ic = iconv_open(dst_charset, src_charset);
return ret;
}
#if 0
static void bgav_hexdump(uint8_t * data, int len, int linebreak)
{
int i;
int bytes_written = 0;
int imax;
while(bytes_written < len)
{
imax = (bytes_written + linebreak > len) ? len - bytes_written : linebreak;
for(i = 0; i < imax; i++)
fprintf(stderr, "%02x ", data[bytes_written + i]);
for(i = imax; i < linebreak; i++)
fprintf(stderr, " ");
for(i = 0; i < imax; i++)
{
if(!(data[bytes_written + i] & 0x80) && (data[bytes_written + i] >= 32))
fprintf(stderr, "%c", data[bytes_written + i]);
else
fprintf(stderr, ".");
}
bytes_written += imax;
fprintf(stderr, "\n");
}
}
#endif
void cdio_charset_converter_destroy(cdio_charset_coverter_t*cnv)
{
iconv_close(cnv->ic);
free(cnv);
}
#define BYTES_INCREMENT 16
static bool
do_convert(iconv_t cd, char * src, int src_len,
char ** dst, int *dst_len)
{
char * ret;
char *inbuf;
char *outbuf;
int alloc_size;
int output_pos;
size_t inbytesleft;
size_t outbytesleft;
if(src_len < 0)
src_len = strlen(src);
#if 0
fprintf(stderr, "Converting:\n");
bgav_hexdump(src, src_len, 16);
#endif
alloc_size = src_len + BYTES_INCREMENT;
inbytesleft = src_len;
/* We reserve space here to add a final '\0' */
outbytesleft = alloc_size-1;
ret = malloc(alloc_size);
inbuf = src;
outbuf = ret;
while(1)
{
if(iconv(cd, &inbuf, &inbytesleft,
&outbuf, &outbytesleft) == (size_t)-1)
{
switch(errno)
{
case E2BIG:
output_pos = (int)(outbuf - ret);
alloc_size += BYTES_INCREMENT;
outbytesleft += BYTES_INCREMENT;
ret = realloc(ret, alloc_size);
outbuf = ret + output_pos;
break;
default:
fprintf(stderr, "Iconv failed:Â %s\n", strerror(errno));
free(ret);
return false;
break;
}
}
if(!inbytesleft)
break;
}
/* Zero terminate */
*outbuf = '\0';
/* Set return values */
*dst = ret;
if(dst_len)
*dst_len = (int)(outbuf - ret);
#if 0
fprintf(stderr, "Conversion done, src:\n");
bgav_hexdump(src, src_len, 16);
fprintf(stderr, "dst:\n");
bgav_hexdump((uint8_t*)(ret), (int)(outbuf - ret), 16);
#endif
return true;
}
bool cdio_charset_convert(cdio_charset_coverter_t*cnv,
char * src, int src_len,
char ** dst, int * dst_len)
{
return do_convert(cnv->ic, src, src_len, dst, dst_len);
}
bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst,
int * dst_len, const char * dst_charset)
{
iconv_t ic;
bool result;
ic = iconv_open(dst_charset, "UTF-8");
result = do_convert(ic, src, -1, dst, dst_len);
iconv_close(ic);
return result;
}
bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst,
const char * src_charset)
{
iconv_t ic;
bool result;
ic = iconv_open("UTF-8", src_charset);
result = do_convert(ic, src, src_len, dst, NULL);
iconv_close(ic);
return result;
}
_______________________________________________
Libcdio-devel mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/libcdio-devel