Hello folks,
Found out that the code of convert() in EncodingConversionExample.c wasn’t so
clear after all...
Fixing that with the code below gives:
07:09:48 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash
--> Compiling EncodingConversionExample.c...
16 -rwxr-xr-x. 1 menu menu 13243 Dec 1 07:10 EncodingConversionExample
--> Running EncodingConversionExample:
--> encoding = ISO-8859-1
--> content = élàö
--> size = 8
--> out_size = 15
--> temp = 7
--> ret = 13
--> temp = 7
--> out_size = 13
conversion was successful. converted: 7 octets.
--> out = élà ö
<?xml version="1.0" encoding="ISO-8859-1"?>
<root>élàö</root>
With ISO-8859-7, though:
07:10:05 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash
--> Compiling EncodingConversionExample.c...
16 -rwxr-xr-x. 1 menu menu 13243 Dec 1 07:10 EncodingConversionExample
--> Running EncodingConversionExample:
--> encoding = ISO-8859-7
--> content = élàö
--> size = 8
--> out_size = 15
--> temp = 7
./DoEncodingConversionExample.bash: line 28: 14446 Segmentation fault
(core dumped) ./${EXECUTABLE} "élàö"
//-----------------------------
#include <string.h>
#include <libxml/parser.h>
unsigned char*
convert (unsigned char *in, char *encoding)
{
unsigned char *out;
int ret,size,out_size,temp;
xmlCharEncodingHandlerPtr handler;
size = (int)strlen(in)+1;
out_size = size*2-1;
out = malloc((size_t)out_size);
printf("--> size = %d\n", size);
printf("--> out_size = %d\n", out_size);
if (out) {
handler = xmlFindCharEncodingHandler(encoding);
if (!handler) {
printf("--> %s encoding handler not found\n", encoding);
free(out);
out = NULL;
}
}
if (out) {
temp=size-1;
printf("--> temp = %d\n", temp);
/**
* xmlCharEncodingInputFunc:
* @out: a pointer to an array of bytes to store the UTF-8 result
* @outlen: the length of @out
* @in: a pointer to an array of chars in the original encoding
* @inlen: the length of @in
*
* Take a block of chars in the original encoding and try to convert
* it to an UTF-8 block of chars out.
*
* Returns the number of bytes written, -1 if lack of space, or -2
* if the transcoding failed.
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictiable.
* The value of @outlen after return is the number of octets consumed.
*/
ret = handler->input(out, &out_size, in, &temp);
if (ret == -1) {
printf("Conversion couldn't be done, lack of space.\n");
} else {
if (ret == -2) {
printf("Conversion couldn't be done, transcoding failed.\n");
} else {
if (ret == 0) {
printf("--> temp = %d\n", temp);
printf("--> temp = %d\n", temp);
printf("--> out_size = %d\n", out_size);
printf("conversion wasn't successful. converted: %i octets.\n",temp);
free(out);
out = NULL;
} else {
// ret > 0
printf("--> ret = %d\n", ret);
printf("--> temp = %d\n", temp);
printf("--> out_size = %d\n", out_size);
printf("conversion was successful. converted: %i octets.\n",temp);
out = realloc(out,out_size+1);
out[out_size]=0; /*null terminating out*/
}}}
} else {
printf("no mem\n");
}
return (out);
}
int
main(int argc, char **argv) {
unsigned char *content, *out;
xmlDocPtr doc;
xmlNodePtr rootnode;
char *encoding = "ISO-8859-7"; // JMI, was -1
if (argc <= 1) {
printf("Usage: %s content\n", argv[0]);
return(0);
}
printf("--> encoding = %s\n", encoding);
content = argv[1];
printf("--> content = %s\n", content);
out = convert(content, encoding);
printf("--> out = %s\n", out);
doc = xmlNewDoc ("1.0");
rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
xmlDocSetRootElement(doc, rootnode);
xmlSaveFormatFileEnc("-", doc, encoding, 1);
return (1);
}
//-----------------------------
> Le 30 nov. 2015 à 23:31, Menu Jacques <[email protected]> a écrit :
>
> Hello,
>
> Same problem on CentOS 7.1.
>
> My environment contains:
>
> 23:25:30 (690) menu@ - ~ > echo $LC_TYPE
> iso_8859_1
> 23:25:48 (691) menu@ - ~ > echo $LANG
> C
> 23:26:00 (692) menu@ - ~ > echo GDM_LANG
> GDM_LANG
>
> and I get:
>
> 23:27:02 (695) menu@ - ~/examples_libxml2 > ./EncodingConversionExample foo
> --> content = foo
> --> size = 4
> --> out_size = 7
> --> temp = 3
> --> temp-size+1 = 0
> conversion wasn't successful.
> --> out = (null)
> <?xml version="1.0" encoding="ISO-8859-1"?>
> <root/>
> 23:27:20 (696) menu@ - ~/examples_libxml2 > ./EncodingConversionExample éöîà
> --> content = éöîà
> --> size = 9
> --> out_size = 17
> --> temp = 8
> --> temp-size+1 = 0
> conversion wasn't successful.
> --> out = (null)
> <?xml version="1.0" encoding="ISO-8859-1"?>
> <root/>
>
> I’m clearly doing something wrong, but what?
>
> JM
>
>> Le 25 nov. 2015 à 13:08, Menu Jacques <[email protected]> a écrit :
>>
>> I use:
>>
>> 13:07:34 (254) menu@ - ~/libxml2-git > apt list libxml2 libxml2-dev
>> Listing... Done
>> libxml2/stable,now 2.9.1+dfsg1-5 i386 [installed,automatic]
>> libxml2-dev/stable,now 2.9.1+dfsg1-5 i386 [installed]
>>
>> on Debian 8 32bit (jessie)
>>
>> JM
>>
>>> Le 25 nov. 2015 à 11:17, Menu Jacques <[email protected]> a écrit :
>>>
>>> Hello folks,
>>>
>>> I’ve successfully built the examples from
>>> http://xmlsoft.org/tutorial/index.html, except the last one about encoding
>>> conversion.
>>>
>>> I added printouts to the original code to help (see below) and get:
>>>
>>> 10:51:35 (250) menu@ - ~/libxml2-git > ./EncodingConversionExample foo
>>> --> content = foo
>>> --> size = 4
>>> --> out_size = 7
>>> --> temp = 3
>>> --> temp-size+1 = 0
>>> conversion wasn't successful.
>>> --> out = (null)
>>> <?xml version="1.0" encoding="ISO-8859-1"?>
>>> <root/>
>>>
>>> Thanks for your help!
>>>
>>> JM
>>>
>>> —
>>>
>>> 11:15:24 (251) menu@ - ~/libxml2-git > cat EncodingConversionExample.c
>>> #include <string.h>
>>> #include <libxml/parser.h>
>>>
>>>
>>> unsigned char*
>>> convert (unsigned char *in, char *encoding)
>>> {
>>> unsigned char *out;
>>> int ret,size,out_size,temp;
>>> xmlCharEncodingHandlerPtr handler;
>>>
>>> size = (int)strlen(in)+1;
>>> out_size = size*2-1;
>>> out = malloc((size_t)out_size);
>>>
>>> printf("--> size = %d\n", size);
>>> printf("--> out_size = %d\n", out_size);
>>>
>>> if (out) {
>>> handler = xmlFindCharEncodingHandler(encoding);
>>>
>>> if (!handler) {
>>> printf("--> %s encoding handler not found\n",
>>> encoding);
>>> free(out);
>>> out = NULL;
>>> }
>>> }
>>>
>>> if (out) {
>>> temp=size-1;
>>> printf("--> temp = %d\n", temp);
>>>
>>> ret = handler->input(out, &out_size, in, &temp);
>>>
>>> if (ret || temp-size+1) {
>>> printf("--> temp-size+1 = %d\n", temp-size+1);
>>>
>>> if (ret) {
>>> printf("conversion wasn't successful.\n");
>>> } else {
>>> printf("conversion wasn't successful.
>>> converted: %i octets.\n",temp);
>>> }
>>>
>>> free(out);
>>> out = NULL;
>>> } else {
>>> out = realloc(out,out_size+1);
>>> out[out_size]=0; /*null terminating out*/
>>>
>>> }
>>> } else {
>>> printf("no mem\n");
>>> }
>>> return (out);
>>> }
>>>
>>>
>>> int
>>> main(int argc, char **argv) {
>>>
>>> unsigned char *content, *out;
>>> xmlDocPtr doc;
>>> xmlNodePtr rootnode;
>>> char *encoding = "ISO-8859-1";
>>>
>>>
>>> if (argc <= 1) {
>>> printf("Usage: %s content\n", argv[0]);
>>> return(0);
>>> }
>>>
>>> content = argv[1];
>>> printf("--> content = %s\n", content);
>>>
>>> out = convert(content, encoding);
>>> printf("--> out = %s\n", out);
>>>
>>> doc = xmlNewDoc ("1.0");
>>> rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out);
>>> xmlDocSetRootElement(doc, rootnode);
>>>
>>> xmlSaveFormatFileEnc("-", doc, encoding, 1);
>>> return (1);
>>> }
>>>
>>
>
_______________________________________________
xml mailing list, project page http://xmlsoft.org/
[email protected]
https://mail.gnome.org/mailman/listinfo/xml