Author: grothoff Date: 2006-03-08 18:26:01 -0800 (Wed, 08 Mar 2006) New Revision: 2467
Modified: Extractor/src/include/extractor.h Extractor/src/main/extractor.c Extractor/src/plugins/ole2/ole2extractor.c Extractor/src/plugins/wordleaker/wordextractor.cc Extractor/src/plugins/wordleaker/wordleaker.cpp Extractor/src/plugins/wordleaker/wordleaker.h Log: more wordleaker hacking Modified: Extractor/src/include/extractor.h =================================================================== --- Extractor/src/include/extractor.h 2006-03-08 13:52:16 UTC (rev 2466) +++ Extractor/src/include/extractor.h 2006-03-09 02:26:01 UTC (rev 2467) @@ -140,8 +140,17 @@ EXTRACTOR_ORIENTATION = 87, EXTRACTOR_TEMPLATE = 88, EXTRACTOR_SPLIT = 89, - - EXTRACTOR_PRODUCTVERSION = 90, + EXTRACTOR_PRODUCTVERSION = 90, + EXTRACTOR_LAST_SAVED_BY = 91, + EXTRACTOR_LAST_PRINTED = 92, + EXTRACTOR_WORD_COUNT = 93, + EXTRACTOR_CHARACTER_COUNT = 94, + EXTRACTOR_TOTAL_EDITING_TIME = 95, + EXTRACTOR_THUMBNAILS = 96, + EXTRACTOR_SECURITY = 97, + EXTRACTOR_CREATED_BY_SOFTWARE = 98, + EXTRACTOR_MODIFIED_BY_SOFTWARE = 99, + EXTRACTOR_REVISION_HISTORY = 100, } EXTRACTOR_KeywordType; /** Modified: Extractor/src/main/extractor.c =================================================================== --- Extractor/src/main/extractor.c 2006-03-08 13:52:16 UTC (rev 2466) +++ Extractor/src/main/extractor.c 2006-03-09 02:26:01 UTC (rev 2467) @@ -132,11 +132,21 @@ gettext_noop("template"), gettext_noop("split"), gettext_noop("product version"), + gettext_noop("last saved by"), + gettext_noop("last printed"), + gettext_noop("word count"), + gettext_noop("character count"), + gettext_noop("total editing time"), + gettext_noop("thumbnails"), + gettext_noop("security"), + gettext_noop("created by software"), + gettext_noop("modified by software"), + gettext_noop("revision history"), NULL, }; /* the number of keyword types (for bounds-checking) */ -#define HIGHEST_TYPE_NUMBER 91 +#define HIGHEST_TYPE_NUMBER 101 #ifdef HAVE_LIBOGG #if HAVE_VORBIS Modified: Extractor/src/plugins/ole2/ole2extractor.c =================================================================== --- Extractor/src/plugins/ole2/ole2extractor.c 2006-03-08 13:52:16 UTC (rev 2466) +++ Extractor/src/plugins/ole2/ole2extractor.c 2006-03-09 02:26:01 UTC (rev 2467) @@ -1627,7 +1627,7 @@ g_warning ("error: %s", error->message); g_error_free (error); } else { - g_warning ("unknown error converting string property, using blank"); + // g_warning ("unknown error converting string property, using blank"); } *data += 4 + len * section->char_size; break; Modified: Extractor/src/plugins/wordleaker/wordextractor.cc =================================================================== --- Extractor/src/plugins/wordleaker/wordextractor.cc 2006-03-08 13:52:16 UTC (rev 2466) +++ Extractor/src/plugins/wordleaker/wordextractor.cc 2006-03-09 02:26:01 UTC (rev 2467) @@ -30,8 +30,41 @@ #include "wordleaker.h" #include "pole.h" + + +#include <iostream> +#include <fstream> +#include <stdlib.h> +#include <list> +#include <ctime> + + extern "C" { + static EXTRACTOR_KeywordType + SummaryProperties[] = { + EXTRACTOR_UNKNOWN, + EXTRACTOR_UNKNOWN, + EXTRACTOR_TITLE, + EXTRACTOR_SUBJECT, + EXTRACTOR_AUTHOR, + EXTRACTOR_KEYWORDS, + EXTRACTOR_COMMENT, + EXTRACTOR_TEMPLATE, + EXTRACTOR_LAST_SAVED_BY, + EXTRACTOR_VERSIONNUMBER, + EXTRACTOR_TOTAL_EDITING_TIME, + EXTRACTOR_LAST_PRINTED, + EXTRACTOR_CREATION_DATE, + EXTRACTOR_MODIFICATION_DATE, + EXTRACTOR_PAGE_COUNT, + EXTRACTOR_WORD_COUNT, + EXTRACTOR_CHARACTER_COUNT, + EXTRACTOR_THUMBNAILS, + EXTRACTOR_SOFTWARE, + EXTRACTOR_SECURITY, + }; + static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordType type, const char * keyword, struct EXTRACTOR_Keywords * next) { @@ -46,9 +79,151 @@ return result; } + static char * dateToString( unsigned long date ) { + char f[16]; + sprintf(f, "%d/%d/%d", (date / 10000 % 100), (date / 100 % 100), (date % 100)); + return strdup(f); + } + + static const char * idToProduct( unsigned int id ) { + // TODO: find the rest of ids + switch ( id ) { + case 0x6A62: + return "Word 97"; + case 0x626A: + return "Word 98 (Mac)"; + default: + return "Unknown"; + } + } + + static const char * lidToLanguage( unsigned int lid ) { + switch ( lid ) { + case 0x0400: + return _("No Proofing"); + case 0x0401: + return _("Arabic"); + case 0x0402: + return _("Bulgarian"); + case 0x0403: + return _("Catalan"); + case 0x0404: + return _("Traditional Chinese"); + case 0x0804: + return _("Simplified Chinese"); + case 0x0405: + return _("Czech"); + case 0x0406: + return _("Danish"); + case 0x0407: + return _("German"); + case 0x0807: + return _("Swiss German"); + case 0x0408: + return _("Greek"); + case 0x0409: + return _("U.S. English"); + case 0x0809: + return _("U.K. English"); + case 0x0c09: + return _("Australian English"); + case 0x040a: + return _("Castilian Spanish"); + case 0x080a: + return _("Mexican Spanish"); + case 0x040b: + return _("Finnish"); + case 0x040c: + return _("French"); + case 0x080c: + return _("Belgian French"); + case 0x0c0c: + return _("Canadian French"); + case 0x100c: + return _("Swiss French"); + case 0x040d: + return _("Hebrew"); + case 0x040e: + return _("Hungarian"); + case 0x040f: + return _("Icelandic"); + case 0x0410: + return _("Italian"); + case 0x0810: + return _("Swiss Italian"); + case 0x0411: + return _("Japanese"); + case 0x0412: + return _("Korean"); + case 0x0413: + return _("Dutch"); + case 0x0813: + return _("Belgian Dutch"); + case 0x0414: + return _("Norwegian - Bokmal"); + case 0x0814: + return _("Norwegian - Nynorsk"); + case 0x0415: + return _("Polish"); + case 0x0416: + return _("Brazilian Portuguese"); + case 0x0816: + return _("Portuguese"); + case 0x0417: + return _("Rhaeto-Romanic"); + case 0x0418: + return _("Romanian"); + case 0x0419: + return _("Russian"); + case 0x041a: + return _("Croato-Serbian (Latin)"); + case 0x081a: + return _("Serbo-Croatian (Cyrillic)"); + case 0x041b: + return _("Slovak"); + case 0x041c: + return _("Albanian"); + case 0x041d: + return _("Swedish"); + case 0x041e: + return _("Thai"); + case 0x041f: + return _("Turkish"); + case 0x0420: + return _("Urdu"); + case 0x0421: + return _("Bahasa"); + case 0x0422: + return _("Ukrainian"); + case 0x0423: + return _("Byelorussian"); + case 0x0424: + return _("Slovenian"); + case 0x0425: + return _("Estonian"); + case 0x0426: + return _("Latvian"); + case 0x0427: + return _("Lithuanian"); + case 0x0429: + return _("Farsi"); + case 0x042D: + return _("Basque"); + case 0x042F: + return _("Macedonian"); + case 0x0436: + return _("Afrikaans"); + case 0x043E: + return _("Malaysian"); + default: + return _("Unknown"); + } + } + + // read the type of the property and displays its value - char * getProperty( POLE::Stream* stream ) { + static char * getProperty( POLE::Stream* stream ) { unsigned long read, type; unsigned char buffer[256]; unsigned char c; @@ -88,6 +263,8 @@ j = 0; while ( ((c = stream->getch()) != 0) && (i > j) ) s[j++] = c; + if ( (j > 0) && (s[j-1] == '\n') ) + s[--j] = '\0'; if (j != i) { free(s); return NULL; @@ -98,7 +275,9 @@ t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); t2 = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24); t = filetime_to_unixtime(t1, t2); - return ctime_r((time_t *) &t, (char*)malloc(32)); + char * ret = ctime_r((time_t *) &t, (char*)malloc(32)); + ret[strlen(ret)-1] = '\0'; /* kill newline */ + return ret; } return NULL; } @@ -109,6 +288,7 @@ size_t size, struct EXTRACTOR_Keywords * prev) { char ver[16]; + char product[128]; if (size < 512 + 898) return prev; const unsigned char * buffer = (const unsigned char*) &data[512]; @@ -130,10 +310,23 @@ prev = addKeyword(EXTRACTOR_LANGUAGE, lidToLanguage(lid), prev); + char * date = dateToString(lProductCreated); + snprintf(product, 128, _("%s (Build %s)"), + idToProduct(wMagicCreated), + date); + free(date); + prev = addKeyword(EXTRACTOR_CREATED_BY_SOFTWARE, + product, + prev); + date = dateToString(lProductRevised); + snprintf(product, 128, _("%s (Build %s)"), + idToProduct(wMagicRevised), + date); + free(date); + prev = addKeyword(EXTRACTOR_MODIFIED_BY_SOFTWARE, + product, + prev); - // cout << "Created by: " << idToProduct(wMagicCreated) << " (Build " << dateToString(lProductCreated) << ")" << endl; - // cout << "Revised by: " << idToProduct(wMagicRevised) << " (Build " << dateToString(lProductRevised) << ")" << endl; - POLE::Storage* storage = new POLE::Storage( filename ); storage->open(); if( storage->result() != POLE::Storage::Ok ) @@ -159,11 +352,12 @@ unsigned int propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); unsigned int offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24); if (propertyID > 1 && propertyID < 20) { - // cout << SummaryProperties[propertyID] << ": "; unsigned long offsetCur = stream->tell(); stream->seek(offsetProp + begin); - // read and show the property char * prop = getProperty(stream); + prev = addKeyword(SummaryProperties[propertyID], + prop, + prev); free(prop); stream->seek(offsetCur); } @@ -173,7 +367,9 @@ unsigned int where = 0; // FIXME: should look if using 0Table or 1Table - stream = storage->stream( "1Table" ); + stream = storage->stream("1Table"); + if (! stream) + stream = storage->stream("0Table"); if (stream) { unsigned char * buffer = new unsigned char[lcbSttbSavedBy]; unsigned char buffer2[1024]; @@ -181,34 +377,40 @@ // goto offset of revision stream->seek(fcSttbSavedBy); // read all the revision history - stream->read(buffer, lcbSttbSavedBy); + if (lcbSttbSavedBy == stream->read(buffer, lcbSttbSavedBy)) { - // there are n strings, so n/2 revisions (author & file) - unsigned int nRev = (buffer[2] + (buffer[3] << 8)) / 2; - where = 6; - - for (unsigned int i=0; i < nRev; i++) { - // cout << "Rev #" << i << ": Author \""; - unsigned int length = buffer[where++]; - // it's unicode, for now we only get the low byte - for (unsigned int j=0; j < length; j++) { - where++; - // cout << buffer[where]; - where++; + // there are n strings, so n/2 revisions (author & file) + unsigned int nRev = (buffer[2] + (buffer[3] << 8)) / 2; + where = 6; + for (unsigned int i=0; i < nRev; i++) { + if (where >= lcbSttbSavedBy) + break; + unsigned int length = buffer[where++]; + if (where + 2 * length + 2 >= lcbSttbSavedBy) + break; + char * author = convertToUtf8((const char*) &buffer[where], + length * 2, + "UTF-16BE"); + where += length * 2 + 1; + length = buffer[where++]; + if (where + 2 * length >= lcbSttbSavedBy) + break; + char * filename = convertToUtf8((const char*) &buffer[where], + length * 2, + "UTF-16BE"); + where += length * 2 + 1; + char * rbuf = (char*) malloc(strlen(author) + strlen(filename) + 512); + snprintf(rbuf, 512 + strlen(author) + strlen(filename), + _("Revision #%u: Author '%s' worked on '%s'"), + i, author, filename); + free(author); + free(filename); + prev = addKeyword(EXTRACTOR_REVISION_HISTORY, + rbuf, + prev); + free(rbuf); } - where++; - // cout << "\" worked on file \""; - length = buffer[where++]; - // it's unicode, for now we only get the low byte - for (unsigned int j=0; j < length; j++) { - where++; - // cout << buffer[where]; - where++; - } - where++; - // cout << "\"" << endl; } - delete buffer; } Modified: Extractor/src/plugins/wordleaker/wordleaker.cpp =================================================================== --- Extractor/src/plugins/wordleaker/wordleaker.cpp 2006-03-08 13:52:16 UTC (rev 2466) +++ Extractor/src/plugins/wordleaker/wordleaker.cpp 2006-03-09 02:26:01 UTC (rev 2467) @@ -37,6 +37,8 @@ unsigned long fcSttbSavedBy; unsigned long lcbSttbSavedBy; + + // read the type of the property and displays its value void showProperty( POLE::Stream* stream ) { @@ -273,7 +275,6 @@ } -#if HAVE_MAIN int main(int argc, char *argv[]) { cout << endl << "WordLeaker v.0.1" << endl; cout << " by Madelman (http://elligre.tk/madelman/)" << endl << endl; @@ -308,4 +309,3 @@ return 0; } -#endif Modified: Extractor/src/plugins/wordleaker/wordleaker.h =================================================================== --- Extractor/src/plugins/wordleaker/wordleaker.h 2006-03-08 13:52:16 UTC (rev 2466) +++ Extractor/src/plugins/wordleaker/wordleaker.h 2006-03-09 02:26:01 UTC (rev 2467) @@ -27,30 +27,8 @@ using namespace std; -static char* SummaryProperties[] = { -"Unknown", -"Unknown", -"Title", -"Subject", -"Author", -"Keywords", -"Comments", -"Template", -"Last Saved By", -"Revision Number", -"Total Editing Time", -"Last Printed", -"Create Time/Date", -"Last Saved Time/Date", -"Number of Pages", -"Number of Words", -"Number of Characters", -"Thumbnails", -"Creating Application", -"Security" -}; - -static char* DocumentSummaryProperties[] = { +static char* +DocumentSummaryProperties[] = { "Dictionary", "Code page", "Category", @@ -70,147 +48,6 @@ "LinksUpTo" }; -string dateToString( unsigned long date ) { - char f[9]; - sprintf(f, "%d/%d/%d", (date / 10000 % 100), (date / 100 % 100), (date % 100)); - return f; -} - -string idToProduct( unsigned int id ) { - // TODO: find the rest of ids - switch ( id ) { - case 0x6A62: - return "Word 97"; - case 0x626A: - return "Word 98 (Mac)"; - default: - return "Unknown"; - } -} - -const char * lidToLanguage( unsigned int lid ) { - switch ( lid ) { - case 0x0400: - return "No Proofing"; - case 0x0401: - return "Arabic"; - case 0x0402: - return "Bulgarian"; - case 0x0403: - return "Catalan"; - case 0x0404: - return "Traditional Chinese"; - case 0x0804: - return "Simplified Chinese"; - case 0x0405: - return "Czech"; - case 0x0406: - return "Danish"; - case 0x0407: - return "German"; - case 0x0807: - return "Swiss German"; - case 0x0408: - return "Greek"; - case 0x0409: - return "U.S. English"; - case 0x0809: - return "U.K. English"; - case 0x0c09: - return "Australian English"; - case 0x040a: - return "Castilian Spanish"; - case 0x080a: - return "Mexican Spanish"; - case 0x040b: - return "Finnish"; - case 0x040c: - return "French"; - case 0x080c: - return "Belgian French"; - case 0x0c0c: - return "Canadian French"; - case 0x100c: - return "Swiss French"; - case 0x040d: - return "Hebrew"; - case 0x040e: - return "Hungarian"; - case 0x040f: - return "Icelandic"; - case 0x0410: - return "Italian"; - case 0x0810: - return "Swiss Italian"; - case 0x0411: - return "Japanese"; - case 0x0412: - return "Korean"; - case 0x0413: - return "Dutch"; - case 0x0813: - return "Belgian Dutch"; - case 0x0414: - return "Norwegian - Bokmal"; - case 0x0814: - return "Norwegian - Nynorsk"; - case 0x0415: - return "Polish"; - case 0x0416: - return "Brazilian Portuguese"; - case 0x0816: - return "Portuguese"; - case 0x0417: - return "Rhaeto-Romanic"; - case 0x0418: - return "Romanian"; - case 0x0419: - return "Russian"; - case 0x041a: - return "Croato-Serbian (Latin)"; - case 0x081a: - return "Serbo-Croatian (Cyrillic)"; - case 0x041b: - return "Slovak"; - case 0x041c: - return "Albanian"; - case 0x041d: - return "Swedish"; - case 0x041e: - return "Thai"; - case 0x041f: - return "Turkish"; - case 0x0420: - return "Urdu"; - case 0x0421: - return "Bahasa"; - case 0x0422: - return "Ukrainian"; - case 0x0423: - return "Byelorussian"; - case 0x0424: - return "Slovenian"; - case 0x0425: - return "Estonian"; - case 0x0426: - return "Latvian"; - case 0x0427: - return "Lithuanian"; - case 0x0429: - return "Farsi"; - case 0x042D: - return "Basque"; - case 0x042F: - return "Macedonian"; - case 0x0436: - return "Afrikaans"; - case 0x043E: - return "Malaysian"; - default: - return "Unknown"; - } -} - /* * filetime_to_unixtime * _______________________________________________ GNUnet-SVN mailing list GNUnet-SVN@gnu.org http://lists.gnu.org/mailman/listinfo/gnunet-svn