Fridrich Strba wrote:
Even the most ugly code that has some desired functionality is worth
showing. IMHO, technical discussions around an existing, though
imperfect, code are really useful for one's growth ;-) And I know what I
am saying when I speak about imperfect code from my own hacking
experience :-)
Then here's the proposed ugly code. So far, not much to look at. This
code was just an experiment.
Andrew
/**
* Copyright (C) 2006 Andrew Ziem.
* Copyright (C) 2004, 2005 William Lachance ([EMAIL PROTECTED])
* (some parts copied from libwpd 0.8.6)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <inttypes.h>
#include <errno.h>
#include <iconv.h>
#include <gsf/gsf-utils.h>
#include <gsf/gsf-input-stdio.h>
#include <gsf/gsf-infile.h>
#include <gsf/gsf-infile-msole.h>
//fixme
#define WPD_LE_GET_GUINT8(p) (*(uint8_t const *)(p))
#define WPD_LE_GET_GUINT16(p) \
(uint16_t)((((uint8_t const *)(p))[0] << 0) | \
(((uint8_t const *)(p))[1] << 8))
#define WPD_LE_GET_GUINT32(p) \
(uint32_t)((((uint8_t const *)(p))[0] << 0) | \
(((uint8_t const *)(p))[1] << 8) | \
(((uint8_t const *)(p))[2] << 16) | \
(((uint8_t const *)(p))[3] << 24))
#define WPD_BE_GET_GUINT8(p) (*(uint8_t const *)(p))
#define WPD_BE_GET_GUINT16(p) \
(uint16_t)((((uint8_t const *)(p))[1] << 0) | \
(((uint8_t const *)(p))[0] << 8))
#define WPD_BE_GET_GUINT32(p) \
(uint32_t)((((uint8_t const *)(p))[3] << 0) | \
(((uint8_t const *)(p))[2] << 8) | \
(((uint8_t const *)(p))[1] << 16) | \
(((uint8_t const *)(p))[0] << 24))
#define DELETEP(m) if (m) { delete m; m = NULL; }
#define WPS_VERSION_UNKNOWN 0
#define WPS_VERSION_4 1
#define WPS_VERSION_2000 2
#define WPS_VERSION_8 3
#define WPS8_HEADER_MAGIC_OFFSET 0
class FileException
{
// needless to say, we could flesh this class out a bit
};
class GSFInputStream
{
public:
GSFInputStream(GsfInput *input);
virtual ~GSFInputStream();
const uint8_t *read(size_t numBytes, size_t &numBytesRead);
int seek(long offset, GSeekType seekType);
const long tell();
bool atEOS();
std::string readCPPString(size_t numBytes);
char * readCString(size_t numBytes);
bool hasChildByName(char const *name);
int getWorksVersion();
GSFInputStream * getDocumentOLEStream();
int8_t read8();
uint8_t readU8();
uint16_t readU16(bool bigendian=false);
uint32_t readU32(bool bigendian=false);
private:
GsfInput *m_input;
GsfInfile *m_ole;
int wversion;
};
GSFInputStream::GSFInputStream(GsfInput *input)
{
m_input = input;
m_ole = NULL;
wversion = WPS_VERSION_UNKNOWN;
g_object_ref(G_OBJECT(input));
}
GSFInputStream::~GSFInputStream()
{
if (m_ole)
g_object_unref(G_OBJECT(m_ole));
g_object_unref(G_OBJECT(m_input));
}
const uint8_t * GSFInputStream::read(size_t numBytes, size_t &numBytesRead)
{
const uint8_t *buf = gsf_input_read(m_input, numBytes, NULL);
if (buf == NULL)
numBytesRead = 0;
else
numBytesRead = numBytes;
return buf;
}
int GSFInputStream::seek(long offset, GSeekType seekType)
{
return gsf_input_seek(m_input, offset, seekType);
}
const long GSFInputStream::tell()
{
return gsf_input_tell(m_input);
}
bool GSFInputStream::atEOS()
{
return gsf_input_eof(m_input);
}
bool GSFInputStream::hasChildByName(char const *name)
{
if (!m_ole)
m_ole = GSF_INFILE(gsf_infile_msole_new (m_input, NULL));
if (m_ole)
{
GsfInput *g = gsf_infile_child_by_name (m_ole, name);
if (g)
{
g_object_unref(G_OBJECT (g));
return true;
}
}
return false;
}
void gsf_dump_children_by_name(GsfInfile *infile)
{
if (!infile)
return;
int num_children = gsf_infile_num_children(infile);
for (int i = 0; i < num_children; i++)
{
GsfInput *g = gsf_infile_child_by_index(infile, i);
if (g)
{
printf("child: %s\n", gsf_input_name(g));
g_object_unref(G_OBJECT (g));
}
else
{
printf("error getting child %i\n", i);
}
}
}
int GSFInputStream::getWorksVersion()
{
//fixme: a lot of guessing right now
int hasCompObj, hasMM, hasMNO, hasMatOST, hasCONTENTS, hasSPELLING;
bool hasWorks8Magic = false, hasWorks2000Magic = false;
const char CompObj[] = {0x01,'C','o','m','p','O','b','j',0};
hasCompObj = hasChildByName(CompObj);
hasMM = hasChildByName("MM");
hasMNO = hasChildByName("MN0");
hasMatOST = hasChildByName("MatOST");
hasCONTENTS = hasChildByName("CONTENTS");
hasSPELLING = hasChildByName("SPELLING");
if (m_ole)
{
if (hasCONTENTS)
{
char fileMagic[8];
GSFInputStream *documentStream = NULL;
GsfInput * document = gsf_infile_child_by_name(m_ole, "CONTENTS");
if (document)
{
documentStream = new GSFInputStream(document);
g_object_unref(G_OBJECT (document));
/* check the Works 2000, 8 format magics */
documentStream->seek(WPS8_HEADER_MAGIC_OFFSET, G_SEEK_SET);
for (int i=0; i<7 && !documentStream->atEOS(); i++)
fileMagic[i] = documentStream->readU8();
fileMagic[7] = '\0';
// works8 have CHNKWKS -- maybe works7
hasWorks8Magic = (0 == strcmp(fileMagic, "CHNKWKS"));
hasWorks2000Magic = (0 == strcmp(fileMagic, "CHNKINK"));
if (!hasWorks8Magic && !hasWorks2000Magic)
{
//todo: debug message
}
DELETEP(documentStream);
}
}
int num_children = gsf_infile_num_children(m_ole);
// some old Works has same as v4 and {0x05} "SummaryInformation" and {0x05} DocumentSummaryInformation
// old Works, maybe v4, has CompObj, MM, MNO
// Works v8, maybe also 7, has CompObj, CONTENTS, SPELLING
//todo: examine CompObj for version info
// comparing first 68 bytes of many CompObj
// cat ?CompObj | tr -d "\n" | cut -b 1-68 | md5sum -b
// most have a55af8a258d4a125569a58b126c38e9f
if (num_children > 3)
{
printf("num_children=%i\n", num_children);
gsf_dump_children_by_name(m_ole);
}
if (!hasWorks2000Magic && !hasWorks8Magic && hasMM && hasMNO && hasMatOST && !hasCONTENTS && !hasSPELLING)
{
wversion = WPS_VERSION_4;
return wversion;
}
if (hasWorks2000Magic && !hasWorks8Magic && !hasMM && !hasMNO && !hasMatOST && hasCONTENTS)
{
wversion = WPS_VERSION_2000;
return wversion;
}
if (!hasWorks2000Magic && hasWorks8Magic && hasCompObj && !hasMM && !hasMNO && !hasMatOST && hasCONTENTS && hasSPELLING && 3==num_children)
{
wversion = WPS_VERSION_8;
return wversion;
}
if (num_children <= 3)
{
gsf_dump_children_by_name(m_ole);
}
}
wversion = WPS_VERSION_UNKNOWN;
return wversion;
}
GSFInputStream * GSFInputStream::getDocumentOLEStream()
{
GSFInputStream *documentStream = NULL;
if (!m_ole)
m_ole = GSF_INFILE(gsf_infile_msole_new (m_input, NULL));
if (m_ole)
{
GsfInput * document = NULL;
switch (wversion)
{
case WPS_VERSION_4:
document = gsf_infile_child_by_name(m_ole, "MN0");
break;
case WPS_VERSION_2000:
case WPS_VERSION_8:
document = gsf_infile_child_by_name(m_ole, "CONTENTS");
break;
}
if (document)
{
documentStream = new GSFInputStream(document);
g_object_unref(G_OBJECT (document));
}
}
return documentStream;
}
uint8_t GSFInputStream::readU8()
{
size_t numBytesRead;
uint8_t const * p = read(sizeof(uint8_t), numBytesRead);
if (!p || numBytesRead != sizeof(uint8_t))
throw FileException();
return WPD_LE_GET_GUINT8(p);
}
int8_t GSFInputStream::read8()
{
size_t numBytesRead;
int8_t const * p = (int8_t const *) read(sizeof(int8_t), numBytesRead);
if (!p || numBytesRead != sizeof(int8_t))
throw FileException();
return (int8_t)*(p);
}
uint16_t GSFInputStream::readU16(bool bigendian)
{
size_t numBytesRead;
uint16_t const *val = (uint16_t const *) read(sizeof(uint16_t), numBytesRead);
if (!val || numBytesRead != sizeof(uint16_t))
throw FileException();
if (bigendian)
return WPD_BE_GET_GUINT16(val);
return WPD_LE_GET_GUINT16(val);
}
uint32_t GSFInputStream::readU32(bool bigendian)
{
size_t numBytesRead;
uint32_t const *val = (uint32_t const *) read(sizeof(uint32_t), numBytesRead);
if (!val || numBytesRead != sizeof(uint32_t))
throw FileException();
if (bigendian)
return WPD_BE_GET_GUINT32(val);
return WPD_LE_GET_GUINT32(val);
}
std::string GSFInputStream::readCPPString(size_t numBytes)
{
std::string s;
char c;
for (size_t i = 0; i < numBytes; i++)
{
c = (char)readU8();
s += c;
}
return s;
}
char * GSFInputStream::readCString(size_t numBytes)
{
char * s = (char *)malloc(numBytes + 2);
if (NULL == s)
{
perror("malloc");
return NULL;
}
for (size_t i = 0; i < numBytes; i++)
{
s[i] = (char)readU8();
}
return s;
}
static void dump_wps8(GSFInputStream *document)
{
// version number not at 21
document->seek(21, G_SEEK_SET);
int v = document->readU8();
printf("vers ? = %xh (%i)\n", v, v);
// find first TEXT
document->seek(34 - document->tell(), G_SEEK_CUR);
std::string header = document->readCPPString(4);
printf("header = %s\n", header.c_str());
// + 6 bytes data
document->seek(6, G_SEEK_CUR);
// find next TEXT
header = document->readCPPString(4);
printf("header = %s\n", header.c_str());
document->seek(4, G_SEEK_CUR);
size_t text_length = document->readU32();
text_length -= 2;
if (text_length < 2)
{
printf("no text!\n");
return;
}
printf("text length = %i (%xh)\n", text_length, text_length);
// read the text contents
document->seek(0x200, G_SEEK_SET);
char * text = document->readCString(text_length);
text[text_length-2]=text[text_length-1]=0;
// printf("text = %02x,%02x,%02x,%02x\n", text[0], text[1], text[2],text[3]);
// printf("text = %02x,%02x,%02x,%02x\n", text[4], text[5], text[6],text[7]);
iconv_t cd; // conversion descriptor
cd = iconv_open("UTF-8", "UTF-16LE"); //guessing
if ((iconv_t)-1 == cd)
{
g_error("iconv_open() failed\n");
return;
}
size_t outbytesleft =(text_length*2); //fixme: size
printf("outbytesleft starts = %i\n", outbytesleft);
char *outbuffer = (char *)malloc(outbytesleft+1);
if (NULL == outbuffer)
{
perror("malloc");
return;
}
char *source = text;
char *result = outbuffer;
size_t rc = iconv(cd, &text, &text_length, &outbuffer, &outbytesleft);
if ((size_t)-1 == rc)
{
g_error("iconv() failed, errno=%i\n", errno);
return;
}
iconv_close(cd);
// change end of line character
int x = strlen(result);
for (;x>=0;x--)
{
if (0x0D==result[x])
result[x]=0x0A;
}
printf("result = %s\n", result);
// printf("result = %02x,%02x,%02x,%02x\n", result[0], result[1], result[2],result[3]);
free(source);
free(result);
}
static void dump_wps4(GSFInputStream *document)
{
// get text length
// works4: offset for text_length is at 0x26
// checked lengths 1, 2, FF, FFFF
document->seek(0x26, G_SEEK_SET);
size_t text_length = document->readU32();
text_length -= (256+4);
printf("text_length = %i\n", text_length);
// read actual text
// works4: offset for text start is 0x102
document->seek(0x102, G_SEEK_SET);
char * text = document->readCString(text_length);
if (text)
{
printf("text = %s\n", text);
free(text);
}
}
static void
dump_wps (GSFInputStream *istream)
{
int wversion = istream->getWorksVersion();
if (WPS_VERSION_UNKNOWN == wversion)
{
printf("Unknown Works version\n");
return;
}
GSFInputStream *document = istream->getDocumentOLEStream();
if (!document)
{
g_error ("Input stream failed");
return;
}
switch (wversion)
{
case WPS_VERSION_4:
printf("Works version 4 format\n");
dump_wps4(document);
break;
case WPS_VERSION_2000:
printf("Works version 2000 (v5) format\n");
break;
case WPS_VERSION_8:
printf("Works version 8 (Suite 2005) format\n");
dump_wps8(document);
break;
}
DELETEP(document);
}
int main(int argc, char **argv)
{
GsfInput *input;
GError *err = NULL;
char *fn;
if (argc < 2)
{
g_error("put filename on command line");
}
fn = argv[1];
gsf_init ();
printf("\ndebug: opening %s\n", fn);
input = gsf_input_stdio_new(fn, &err);
if (NULL == input)
{
g_return_val_if_fail (err != NULL, 1);
g_warning ("'%s' error: %s", fn, err->message);
g_error_free (err);
return 1;
}
GSFInputStream istream(input);
dump_wps(&istream);
// infile = gsf_infile_msole_new (input, &err);
// g_object_unref (G_OBJECT (input));
gsf_shutdown ();
return 0;
}
all: wps_test
wps_test: wps_test.cpp
g++ -o wps_test wps_test.cpp `pkg-config glib-2.0 --cflags`
-I/usr/include/libgsf-1 -L/usrlib -g -Wall -lgsf-1
-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Libwpd-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/libwpd-devel