> Index: gtk/gtk_save.c > the changes here refer to save branch; a branch to implement save_text, save_complete in gtk; save_complete needed migration to the core from riscos/amiga; the front implements functions essentially to allow saving of files [integrate the filing system] > =================================================================== > --- /dev/null 2009-04-16 19:17:07.000000000 +0100 > +++ gtk/gtk_save.c 2009-07-10 12:49:36.000000000 +0100 > @@ -0,0 +1,68 @@ > +/* > + * Copyright 2009 Mark Benjamin <[email protected]> > + * > + * This file is part of NetSurf, http://www.netsurf-browser.org/ > + * > + * NetSurf is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; version 2 of the License. > + * > + * NetSurf is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include <ctype.h> > +#include <stdio.h> > +#include <string.h> > +#include <libxml/HTMLtree.h> > +#include "desktop/save_complete.h" > +#include "utils/utils.h" > + > +bool save_complete_gui_save(const char *path, const char *filename, struct > content *c, int len, char *sourcedata, int type) > +{ > + int res; > + int namelen; > + namelen = strlen(path) + strlen(filename) + 2; > + char *fullpath = malloc(namelen); > + if (!fullpath) { > + warn_user("NoMemory", 0); > + return false; > + } > + snprintf(fullpath, namelen, "%s/%s", path, filename); > + FILE *f = fopen(fullpath, "w"); /* may need mode 'b' when c != NULL */ > + free(fullpath); > + if (f == NULL) > + return false; > + res = fwrite(sourcedata, len, 1, f); > + fclose(f); > + if (res != 1) > + return false; > + return true; > +} > + > +int save_complete_htmlSaveFileFormat(const char *path, const char *filename, > + xmlDocPtr cur, const char *encoding, int format) > +{ > + int ret; > + int len = strlen(path) + strlen(filename) + 2; > + char *finame = malloc(len); > + if (!finame){ > + warn_user("NoMemory", 0); > + return -1; > + } > + snprintf(finame, len, "%s/%s", path, filename); > + ret = htmlSaveFileFormat(finame, cur, encoding, format); > + free(finame); > + return ret; > +} > + > +bool save_complete_gui_filetype(const char *path, const char *filename, > + int type) > +{ > + return true; > +} > Index: desktop/save_complete.c > from save branch; largely unmodified from riscos; full file system integration is the front's responsibility; core sends filename + directory path that the front has already sent it; > =================================================================== > --- /dev/null 2009-04-16 19:17:07.000000000 +0100 > +++ desktop/save_complete.c 2009-07-10 12:50:20.000000000 +0100 > @@ -0,0 +1,742 @@ > +/* > + * Copyright 2004 John M Bell <[email protected]> > + * Copyright 2004-2007 James Bursa <[email protected]> > + * > + * This file is part of NetSurf, http://www.netsurf-browser.org/ > + * > + * NetSurf is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; version 2 of the License. > + * > + * NetSurf is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +/** \file > + * Save HTML document with dependencies (implementation). > + */ > + > +#include "utils/config.h" > + > +#define _GNU_SOURCE /* for strndup */ > +#include <assert.h> > +#include <ctype.h> > +#include <errno.h> > +#include <stdio.h> > +#include <string.h> > +#include <sys/types.h> > +#include <regex.h> > +#include <libxml/HTMLtree.h> > +#include <libxml/parserInternals.h> > +#include "utils/config.h" > +#include "content/content.h" > +#include "css/css.h" > +#include "render/box.h" > +#include "desktop/save_complete.h" > +#include "utils/log.h" > +#include "utils/url.h" > +#include "utils/utils.h" > + > +regex_t save_complete_import_re; > + > +/** An entry in save_complete_list. */ > +struct save_complete_entry { > + struct content *content; > + struct save_complete_entry *next; /**< Next entry in list */ > +}; > + > +#ifdef RISCOS > + static char pathsep = '.'; > +#else > + static char pathsep = '/'; > +#endif > + > +/** List of urls seen and saved so far. */ > +static struct save_complete_entry *save_complete_list = 0; > + > +static bool save_complete_html(struct content *c, const char *path, > + bool index); > +static bool save_imported_sheets(struct content *c, const char *path); > +static char * rewrite_stylesheet_urls(const char *source, unsigned int size, > + int *osize, const char *base); > +static bool rewrite_document_urls(xmlDoc *doc, const char *base); > +static bool rewrite_urls(xmlNode *n, const char *base); > +static bool rewrite_url(xmlNode *n, const char *attr, const char *base); > +static bool save_complete_list_add(struct content *content); > +static struct content * save_complete_list_find(const char *url); > +static bool save_complete_list_check(struct content *content); > +/* static void save_complete_list_dump(void); */ > +static bool save_complete_inventory(const char *path); > + > +/** > + * Save an HTML page with all dependencies. > + * > + * \param c CONTENT_HTML to save > + * \param path directory to save to (must exist) > + * \return true on success, false on error and error reported > + */ > + > +bool save_complete(struct content *c, const char *path) > +{ > + bool result; > + > + result = save_complete_html(c, path, true); > + > + if (result) > + result = save_complete_inventory(path); > + > + /* free save_complete_list */ > + while (save_complete_list) { > + struct save_complete_entry *next = save_complete_list->next; > + free(save_complete_list); > + save_complete_list = next; > + } > + > + return result; > +} > + > + > +/** > + * Save an HTML page with all dependencies, recursing through imported pages. > + * > + * \param c CONTENT_HTML to save > + * \param path directory to save to (must exist) > + * \param index true to save as "index" > + * \return true on success, false on error and error reported > + */ > + > +bool save_complete_html(struct content *c, const char *path, bool index) > +{ > + char filename[256]; > + unsigned int i; > + xmlDocPtr doc; > + bool res; > + > + if (c->type != CONTENT_HTML) > + return false; > + > + if (save_complete_list_check(c)) > + return true; > + > + /* save stylesheets, ignoring the base and adblocking sheets */ > + for (i = STYLESHEET_STYLE; i != c->data.html.stylesheet_count; i++) { > + struct content *css = c->data.html.stylesheet_content[i]; > + char *source; > + int source_len; > + > + if (!css) > + continue; > + if (save_complete_list_check(css)) > + continue; > + > + if (i != STYLESHEET_STYLE) { > + if (!save_complete_list_add(css)) { > + warn_user("NoMemory", 0); > + return false; > + } > + } > + > + if (!save_imported_sheets(css, path)) > + return false; > + > + if (i == STYLESHEET_STYLE) > + continue; /* don't save <style> elements */ > + > + snprintf(filename, sizeof filename, "%x", (unsigned int) css); > + source = rewrite_stylesheet_urls(css->source_data, > + css->source_size, &source_len, css->url); > + if (!source) { > + warn_user("NoMemory", 0); > + return false; > + } > + res = save_complete_gui_save(path, filename, NULL, source_len, > + source, 0xf79); > + free(source); > + if (res == false) > + return false; > + } > + > + /* save objects */ > + for (i = 0; i != c->data.html.object_count; i++) { > + struct content *obj = c->data.html.object[i].content; > + > + /* skip difficult content types */ > + if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data) > + continue; > + if (save_complete_list_check(obj)) > + continue; > + > + if (!save_complete_list_add(obj)) { > + warn_user("NoMemory", 0); > + return false; > + } > + > + if (obj->type == CONTENT_HTML) { > + if (!save_complete_html(obj, path, false)) > + return false; > + continue; > + } > + > + snprintf(filename, sizeof filename, "%x", (unsigned int) obj); > + res = save_complete_gui_save(path, filename, obj, > + obj->source_size, obj->source_data, 0); > + if(res == false) > + return false; > + } > + > + /*save_complete_list_dump();*/ > + > + /* copy document */ > + doc = xmlCopyDoc(c->data.html.document, 1); > + if (doc == NULL) { > + warn_user("NoMemory", 0); > + return false; > + } > + > + /* rewrite all urls we know about */ > + if (!rewrite_document_urls(doc, c->data.html.base_url)) { > + xmlFreeDoc(doc); > + warn_user("NoMemory", 0); > + return false; > + } > + > + /* save the html file out last of all */ > + if (index) > + snprintf(filename, sizeof filename, "index"); > + else > + snprintf(filename, sizeof filename, "%x", (unsigned int)c); > + > + errno = 0; > + if (save_complete_htmlSaveFileFormat(path, filename, doc, 0, 0) == -1) { > + if (errno) > + warn_user("SaveError", strerror(errno)); > + else > + warn_user("SaveError", "htmlSaveFileFormat failed"); > + > + xmlFreeDoc(doc); > + return false; > + } > + > + xmlFreeDoc(doc); > + > + return save_complete_gui_filetype(path, filename, 0xfaf); > +} > + > + > +/** > + * Save stylesheets imported by a CONTENT_CSS. > + * > + * \param c a CONTENT_CSS > + * \param path path to save to > + * \return true on success, false on error and error reported > + */ > + > +bool save_imported_sheets(struct content *c, const char *path) > +{ > + char filename[256]; > + unsigned int j; > + char *source; > + int source_len; > + bool res; > + > + for (j = 0; j != c->data.css.import_count; j++) { > + struct content *css = c->data.css.import_content[j]; > + > + if (!css) > + continue; > + if (save_complete_list_check(css)) > + continue; > + > + if (!save_complete_list_add(css)) { > + warn_user("NoMemory", 0); > + return false; > + } > + > + if (!save_imported_sheets(css, path)) > + return false; > + > + snprintf(filename, sizeof filename, "%x", (unsigned int) css); > + source = rewrite_stylesheet_urls(css->source_data, > + css->source_size, &source_len, css->url); > + if (!source) { > + warn_user("NoMemory", 0); > + return false; > + } > + > + res = save_complete_gui_save(path, filename, NULL, source_len, > + source, 0xf79); > + free(source); > + if (res == false) > + return false; > + } > + > + return true; > +} > + > + > +/** > + * Initialise the save_complete module. > + */ > + > +void save_complete_init(void) > +{ > + /* Match an @import rule - see CSS 2.1 G.1. */ > + regcomp_wrapper(&save_complete_import_re, > + "@import" /* IMPORT_SYM */ > + "[ \t\r\n\f]*" /* S* */ > + /* 1 */ > + "(" /* [ */ > + /* 2 3 */ > + "\"(([^\"]|[\\]\")*)\"" /* STRING (approximated) */ > + "|" > + /* 4 5 */ > + "'(([^']|[\\]')*)'" > + "|" /* | */ > + "url\\([ \t\r\n\f]*" /* URI (approximated) */ > + /* 6 7 */ > + "\"(([^\"]|[\\]\")*)\"" > + "[ \t\r\n\f]*\\)" > + "|" > + "url\\([ \t\r\n\f]*" > + /* 8 9 */ > + "'(([^']|[\\]')*)'" > + "[ \t\r\n\f]*\\)" > + "|" > + "url\\([ \t\r\n\f]*" > + /* 10 */ > + "([^) \t\r\n\f]*)" > + "[ \t\r\n\f]*\\)" > + ")", /* ] */ > + REG_EXTENDED | REG_ICASE); > +} > + > + > +/** > + * Rewrite stylesheet \...@import rules for save complete. > + * > + * @param source stylesheet source > + * @param size size of source > + * @param osize updated with the size of the result > + * @param base url of stylesheet > + * @return converted source, or 0 on out of memory > + */ > + > +char * rewrite_stylesheet_urls(const char *source, unsigned int size, > + int *osize, const char *base) > +{ > + char *res; > + const char *url; > + char *url2; > + char buf[20]; > + unsigned int offset = 0; > + int url_len = 0; > + struct content *content; > + int m; > + unsigned int i; > + unsigned int imports = 0; > + regmatch_t match[11]; > + url_func_result result; > + > + /* count number occurences of @import to (over)estimate result size */ > + /* can't use strstr because source is not 0-terminated string */ > + for (i = 0; 7 < size && i != size - 7; i++) { > + if (source[i] == '@' && > + tolower(source[i + 1]) == 'i' && > + tolower(source[i + 2]) == 'm' && > + tolower(source[i + 3]) == 'p' && > + tolower(source[i + 4]) == 'o' && > + tolower(source[i + 5]) == 'r' && > + tolower(source[i + 6]) == 't') > + imports++; > + } > + > + res = malloc(size + imports * 20); > + if (!res) > + return 0; > + *osize = 0; > + > + while (offset < size) { > + m = regexec(&save_complete_import_re, source + offset, > + 11, match, 0); > + if (m) > + break; > + > + /*for (unsigned int i = 0; i != 11; i++) { > + if (match[i].rm_so == -1) > + continue; > + fprintf(stderr, "%i: '%.*s'\n", i, > + match[i].rm_eo - match[i].rm_so, > + source + offset + match[i].rm_so); > + }*/ > + > + url = 0; > + if (match[2].rm_so != -1) { > + url = source + offset + match[2].rm_so; > + url_len = match[2].rm_eo - match[2].rm_so; > + } else if (match[4].rm_so != -1) { > + url = source + offset + match[4].rm_so; > + url_len = match[4].rm_eo - match[4].rm_so; > + } else if (match[6].rm_so != -1) { > + url = source + offset + match[6].rm_so; > + url_len = match[6].rm_eo - match[6].rm_so; > + } else if (match[8].rm_so != -1) { > + url = source + offset + match[8].rm_so; > + url_len = match[8].rm_eo - match[8].rm_so; > + } else if (match[10].rm_so != -1) { > + url = source + offset + match[10].rm_so; > + url_len = match[10].rm_eo - match[10].rm_so; > + } > + assert(url); > + > + url2 = strndup(url, url_len); > + if (!url2) { > + free(res); > + return 0; > + } > + result = url_join(url2, base, (char**)&url); > + free(url2); > + if (result == URL_FUNC_NOMEM) { > + free(res); > + return 0; > + } > + > + /* copy data before match */ > + memcpy(res + *osize, source + offset, match[0].rm_so); > + *osize += match[0].rm_so; > + > + if (result == URL_FUNC_OK) { > + content = save_complete_list_find(url); > + if (content) { > + /* replace import */ > + snprintf(buf, sizeof buf, "@import '%x'", > + (unsigned int) content); > + memcpy(res + *osize, buf, strlen(buf)); > + *osize += strlen(buf); > + } else { > + /* copy import */ > + memcpy(res + *osize, source + offset + > match[0].rm_so, > + match[0].rm_eo - match[0].rm_so); > + *osize += match[0].rm_eo - match[0].rm_so; > + } > + } > + else { > + /* copy import */ > + memcpy(res + *osize, source + offset + match[0].rm_so, > + match[0].rm_eo - match[0].rm_so); > + *osize += match[0].rm_eo - match[0].rm_so; > + } > + > + assert(0 < match[0].rm_eo); > + offset += match[0].rm_eo; > + } > + > + /* copy rest of source */ > + if (offset < size) { > + memcpy(res + *osize, source + offset, size - offset); > + *osize += size - offset; > + } > + > + return res; > +} > + > + > +/** > + * Rewrite URLs in a HTML document to be relative. > + * > + * \param doc root of the document tree > + * \param base base url of document > + * \return true on success, false on out of memory > + */ > + > +bool rewrite_document_urls(xmlDoc *doc, const char *base) > +{ > + xmlNode *node; > + > + for (node = doc->children; node; node = node->next) > + if (node->type == XML_ELEMENT_NODE) > + if (!rewrite_urls(node, base)) > + return false; > + > + return true; > +} > + > + > +/** > + * Traverse tree, rewriting URLs as we go. > + * > + * \param n xmlNode of type XML_ELEMENT_NODE to rewrite > + * \param base base url of document > + * \return true on success, false on out of memory > + * > + * URLs in the tree rooted at element n are rewritten. > + */ > + > +bool rewrite_urls(xmlNode *n, const char *base) > +{ > + xmlNode *child; > + > + assert(n->type == XML_ELEMENT_NODE); > + > + /** > + * We only need to consider the following cases: > + * > + * Attribute: Elements: > + * > + * 1) data <object> > + * 2) href <a> <area> <link> > + * 3) src <script> <input> <frame> <iframe> <img> > + * 4) n/a <style> > + * 5) n/a any <base> tag > + * 6) background any (except those above) > + */ > + if (!n->name) { > + /* ignore */ > + } > + /* 1 */ > + else if (strcmp((const char *) n->name, "object") == 0) { > + if (!rewrite_url(n, "data", base)) > + return false; > + } > + /* 2 */ > + else if (strcmp((const char *) n->name, "a") == 0 || > + strcmp((const char *) n->name, "area") == 0 || > + strcmp((const char *) n->name, "link") == 0) { > + if (!rewrite_url(n, "href", base)) > + return false; > + } > + /* 3 */ > + else if (strcmp((const char *) n->name, "frame") == 0 || > + strcmp((const char *) n->name, "iframe") == 0 || > + strcmp((const char *) n->name, "input") == 0 || > + strcmp((const char *) n->name, "img") == 0 || > + strcmp((const char *) n->name, "script") == 0) { > + if (!rewrite_url(n, "src", base)) > + return false; > + } > + /* 4 */ > + else if (strcmp((const char *) n->name, "style") == 0) { > + unsigned int len; > + xmlChar *content; > + > + for (child = n->children; child != 0; child = child->next) { > + /* Get current content */ > + content = xmlNodeGetContent(child); > + if (!content) > + /* unfortunately we don't know if this is > + * due to memory exhaustion, or because > + * there is no content for this node */ > + continue; > + > + /* Rewrite @import rules */ > + char *rewritten = rewrite_stylesheet_urls( > + (const char *) content, > + strlen((const char *) content), > + (int *) &len, base); > + xmlFree(content); > + if (!rewritten) > + return false; > + > + /* set new content */ > + xmlNodeSetContentLen(child, > + (const xmlChar*)rewritten, > + len); > + } > + > + return true; > + } > + /* 5 */ > + else if (strcmp((const char *) n->name, "base") == 0) { > + /* simply remove any <base> tags from the document */ > + xmlUnlinkNode(n); > + xmlFreeNode(n); > + /* base tags have no content, so there's no point recursing > + * additionally, we've just destroyed this node, so trying > + * to recurse would result in bad things happening */ > + return true; > + } > + /* 6 */ > + else { > + if (!rewrite_url(n, "background", base)) > + return false; > + } > + > + /* now recurse */ > + for (child = n->children; child;) { > + /* we must extract the next child now, as if the current > + * child is a <base> element, it will be removed from the > + * tree (see 5, above), thus preventing extraction of the > + * next child */ > + xmlNode *next = child->next; > + if (child->type == XML_ELEMENT_NODE) { > + if (!rewrite_urls(child, base)) > + return false; > + } > + child = next; > + } > + > + return true; > +} > + > + > +/** > + * Rewrite an URL in a HTML document. > + * > + * \param n The node to modify > + * \param attr The html attribute to modify > + * \param base base url of document > + * \return true on success, false on out of memory > + */ > + > +bool rewrite_url(xmlNode *n, const char *attr, const char *base) > +{ > + char *url, *data; > + char rel[20]; > + struct content *content; > + url_func_result res; > + > + if (!xmlHasProp(n, (const xmlChar *) attr)) > + return true; > + > + data = (char *) xmlGetProp(n, (const xmlChar *) attr); > + if (!data) > + return false; > + > + res = url_join(data, base, &url); > + xmlFree(data); > + if (res == URL_FUNC_NOMEM) > + return false; > + else if (res == URL_FUNC_OK) { > + content = save_complete_list_find(url); > + if (content) { > + /* found a match */ > + free(url); > + snprintf(rel, sizeof rel, "%x", > + (unsigned int) content); > + if (!xmlSetProp(n, (const xmlChar *) attr, > + (xmlChar *) rel)) > + return false; > + } else { > + /* no match found */ > + if (!xmlSetProp(n, (const xmlChar *) attr, > + (xmlChar *) url)) { > + free(url); > + return false; > + } > + free(url); > + } > + } > + > + return true; > +} > + > + > +/** > + * Add a content to the save_complete_list. > + * > + * \param content content to add > + * \return true on success, false on out of memory > + */ > + > +bool save_complete_list_add(struct content *content) > +{ > + struct save_complete_entry *entry; > + entry = malloc(sizeof (*entry)); > + if (!entry) > + return false; > + entry->content = content; > + entry->next = save_complete_list; > + save_complete_list = entry; > + return true; > +} > + > + > +/** > + * Look up a url in the save_complete_list. > + * > + * \param url url to find > + * \return content if found, 0 otherwise > + */ > + > +struct content * save_complete_list_find(const char *url) > +{ > + struct save_complete_entry *entry; > + for (entry = save_complete_list; entry; entry = entry->next) > + if (strcmp(url, entry->content->url) == 0) > + return entry->content; > + return 0; > +} > + > + > +/** > + * Look up a content in the save_complete_list. > + * > + * \param content pointer to content > + * \return true if the content is in the save_complete_list > + */ > + > +bool save_complete_list_check(struct content *content) > +{ > + struct save_complete_entry *entry; > + for (entry = save_complete_list; entry; entry = entry->next) > + if (entry->content == content) > + return true; > + return false; > +} > + > + > +#if 0 > +/** > + * Dump save complete list to stderr > + */ > +void save_complete_list_dump(void) > +{ > + struct save_complete_entry *entry; > + for (entry = save_complete_list; entry; entry = entry->next) > + fprintf(stderr, "%p : %s\n", entry->content, > + entry->content->url); > +} > +#endif > + > + > +/** > + * Create the inventory file listing original URLs. > + */ > + > +bool save_complete_inventory(const char *path) > +{ > + char spath[256]; > + FILE *fp; > + char *pathstring; > + > + pathstring = strdup("%s/Inventory"); > + if (!pathstring) { > + warn_user("NoMemory", 0); > + return false; > + } > + pathstring[2] = pathsep; > + snprintf(spath, sizeof spath, pathstring, path); > + free(pathstring); > + > + fp = fopen(spath, "w"); > + if (!fp) { > + LOG(("fopen(): errno = %i", errno)); > + warn_user("SaveError", strerror(errno)); > + return false; > + } > + > + struct save_complete_entry *entry; > + for (entry = save_complete_list; entry; entry = entry->next) > + fprintf(fp, "%x %s\n", > + (unsigned int) entry->content, > + entry->content->url); > + > + fclose(fp); > + > + return true; > +} > + > Index: desktop/save_complete.h > =================================================================== > --- /dev/null 2009-04-16 19:17:07.000000000 +0100 > +++ desktop/save_complete.h 2009-07-10 12:50:22.000000000 +0100 > @@ -0,0 +1,59 @@ > +/* > + * Copyright 2004 John M Bell <[email protected]> > + * Copyright 2009 Mark Benjamin <[email protected]> > + * > + * This file is part of NetSurf, http://www.netsurf-browser.org/ > + * > + * NetSurf is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; version 2 of the License. > + * > + * NetSurf is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +/** \file > + * Save HTML document with dependencies (interface). > + */ > + > +#ifndef _NETSURF_DESKTOP_SAVE_COMPLETE_H_ > +#define _NETSURF_DESKTOP_SAVE_COMPLETE_H_ > + > +#include <stdbool.h> > +#include <libxml/HTMLtree.h> > + > +struct content; > + > +void save_complete_init(void); > +bool save_complete(struct content *c, const char *path); > + > +/** > + * conducts the filesystem save appropriate to the gui > + * \param path save path > + * \param filename name of file to save > + * \param c content to save, or NULL > + * \param len data length > + * \param sourcedata pointer to data to save, NULL when all data in > sourcedata > + * \param type integer filetype [riscos] > + * \return true for success > + */ > +bool save_complete_gui_save(const char *path, const char *filename, struct > content *c, int len, char *sourcedata, int type); > + > +/** > + * wrapper for lib function htmlSaveFileFormat > + */ > +int save_complete_htmlSaveFileFormat(const char *path, const char *filename, > + xmlDocPtr cur, const char *encoding, int format); > + > +/** > + * mainly implemented as a wrapper for riscos > + */ > +bool save_complete_gui_filetype(const char *path, const char *filename, > + int type); > + > +#endif > Index: riscos/save.c > riscos filesystem-specific code for save migration to core in save branch > =================================================================== > --- riscos/save.c (revision 8438) > +++ riscos/save.c (working copy) > @@ -36,6 +36,7 @@ > #include "oslib/wimp.h" > #include "oslib/wimpspriteop.h" > #include "desktop/netsurf.h" > +#include "desktop/save_complete.h" > #include "desktop/save_text.h" > #include "desktop/selection.h" > #include "image/bitmap.h" > @@ -48,7 +49,6 @@ > #include "riscos/options.h" > #include "riscos/query.h" > #include "riscos/save.h" > -#include "riscos/save_complete.h" > #include "riscos/save_draw.h" > #include "riscos/save_pdf.h" > #include "riscos/textselection.h" > @@ -986,7 +986,69 @@ > gui_save_content = 0; > } > > +bool save_complete_gui_save(const char *path, const char *filename, struct > + content *c, int len, char *sourcedata, int type) > +{ > + char *finame; > + int namelen = strlen(path) + strlen(filename) + 2; > + finame = malloc(namelen); > + if (!finame) { > + warn_user("NoMemory", 0); > + return false; > + } > + snprintf(finame, namelen, "%s.%s", path, filename); > + os_error *error; > + if (c != NULL) > + type = ro_content_filetype(c); > + error = xosfile_save_stamped(finame, type, (byte *) sourcedata, > + (byte *) sourcedata + len); > + free(finame); > + if (error) { > + LOG(("xosfile_save_stamped: 0x%x: %s", > + error->errnum, error->errmess)); > + warn_user("SaveError", error->errmess); > + return false; > + } > + return true; > +} > > +int save_complete_htmlSaveFileFormat(const char *path, const char *filename, > + xmlDocPtr cur, const char *encoding, int format) > +{ > + int ret; > + int len = strlen(path) + strlen(filename) + 2; > + char *finame = malloc(len); > + if (!finame){ > + warn_user("NoMemory", 0); > + return -1; > + } > + snprintf(finame, len, "%s.%s", path, filename); > + ret = htmlSaveFileFormat(finame, cur, encoding, format); > + free(finame); > + return ret; > +} > + > +bool save_complete_gui_filetype(const char *path, const char *filename, int > type) > +{ > + os_error *error; > + int len = strlen(path) + strlen(filename) + 2; > + char *finame = malloc(len); > + if (!finame){ > + warn_user("NoMemory", 0); > + return -1; > + } > + snprintf(finame, len, "%s.%s", path, filename); > + error = xosfile_set_type(finame, type); > + free(finame); > + if (error) { > + LOG(("xosfile_set_type: 0x%x: %s", > + error->errnum, error->errmess)); > + warn_user("SaveError", error->errmess); > + return false; > + } > + return true; > +} > + > /** > * Prepare an application directory and save_complete() to it. > * > Index: amiga/menu.c > =================================================================== > --- amiga/menu.c (revision 8438) > +++ amiga/menu.c (working copy) > @@ -30,13 +30,13 @@ > #include "amiga/save_pdf.h" > #include "desktop/save_text.h" > #include "desktop/save_pdf/pdf_plotters.h" > +#include "desktop/save_complete.h" > #include <string.h> > #include "amiga/tree.h" > #include "amiga/history.h" > #include "amiga/cookies.h" > #include <proto/exec.h> > #include "amiga/arexx.h" > -#include "amiga/save_complete.h" > #include "utils/url.h" > #include <dos/anchorpath.h> > #include "desktop/textinput.h" > Index: amiga/download.c > =================================================================== > --- amiga/download.c (revision 8438) > +++ amiga/download.c (working copy) > @@ -29,13 +29,13 @@ > #include "amiga/download.h" > #include "amiga/object.h" > #include "amiga/options.h" > -#include "amiga/save_complete.h" > #include "amiga/bitmap.h" > #include "amiga/iff_dr2d.h" > > #include "content/fetch.h" > > #include "desktop/selection.h" > +#include "desktop/save_complete.h" > > #include "utils/messages.h" > #include "utils/utils.h" > Index: amiga/save_complete.c > migration of most of save logic to core in save branch; file system interface functions in amiga front > =================================================================== > --- amiga/save_complete.c (revision 8438) > +++ amiga/save_complete.c (working copy) > @@ -2,6 +2,7 @@ > * Copyright 2004 John M Bell <[email protected]> > * Copyright 2004-2007 James Bursa <[email protected]> > * Copyright 2008 Chris Young <[email protected]> > + * Copyright 2009 Mark Benjamin <[email protected]> > * > * This file is part of NetSurf, http://www.netsurf-browser.org/ > * > @@ -18,804 +19,90 @@ > * along with this program. If not, see <http://www.gnu.org/licenses/>. > */ > > -/** \file > - * Save HTML document with dependencies (implementation). > - */ > - > -#include "utils/config.h" > -//#define _GNU_SOURCE /* for strndup */ > -#include <assert.h> > #include <ctype.h> > -#include <errno.h> > #include <stdio.h> > #include <string.h> > -#include <sys/types.h> > -#include <regex.h> > #include <libxml/HTMLtree.h> > -#include <libxml/parserInternals.h> > -#include "utils/config.h" > -#include "css/css.h" > -#include "render/box.h" > -#include "amiga/save_complete.h" > -#include "utils/log.h" > -#include "utils/url.h" > +#include "desktop/save_complete.h" > #include "utils/utils.h" > -#include <proto/dos.h> > #include <proto/icon.h> > #include <workbench/icon.h> > > -regex_t save_complete_import_re; > - > -/** An entry in save_complete_list. */ > -struct save_complete_entry { > - struct content *content; > - struct save_complete_entry *next; /**< Next entry in list */ > -}; > - > -/** List of urls seen and saved so far. */ > -static struct save_complete_entry *save_complete_list = 0; > - > -static bool save_complete_html(struct content *c, const char *path, > - bool index); > -static bool save_imported_sheets(struct content *c, const char *path); > -static char * rewrite_stylesheet_urls(const char *source, unsigned int size, > - int *osize, const char *base); > -static bool rewrite_document_urls(xmlDoc *doc, const char *base); > -static bool rewrite_urls(xmlNode *n, const char *base); > -static bool rewrite_url(xmlNode *n, const char *attr, const char *base); > -static bool save_complete_list_add(struct content *content); > -static struct content * save_complete_list_find(const char *url); > -static bool save_complete_list_check(struct content *content); > -/* static void save_complete_list_dump(void); */ > -static bool save_complete_inventory(const char *path); > - > -/** > - * Save an HTML page with all dependencies. > - * > - * \param c CONTENT_HTML to save > - * \param path directory to save to (must exist) > - * \return true on success, false on error and error reported > - */ > - > -bool save_complete(struct content *c, const char *path) > +bool save_complete_gui_save(const char *path, const char *filename, struct > content *c, int len, char *sourcedata, int type) > { > - bool result; > - > - result = save_complete_html(c, path, true); > - > - if (result) > - result = save_complete_inventory(path); > - > - /* free save_complete_list */ > - while (save_complete_list) { > - struct save_complete_entry *next = save_complete_list->next; > - free(save_complete_list); > - save_complete_list = next; > - } > - > - return result; > -} > - > - > -/** > - * Save an HTML page with all dependencies, recursing through imported pages. > - * > - * \param c CONTENT_HTML to save > - * \param path directory to save to (must exist) > - * \param index true to save as "index" > - * \return true on success, false on error and error reported > - */ > - > -bool save_complete_html(struct content *c, const char *path, bool index) > -{ > - char spath[256]; > - unsigned int i; > - htmlParserCtxtPtr parser; > - BPTR fh = 0; > - > - if (c->type != CONTENT_HTML) > - return false; > - > - if (save_complete_list_check(c)) > - return true; > - > - /* save stylesheets, ignoring the base and adblocking sheets */ > - for (i = STYLESHEET_STYLE; i != c->data.html.stylesheet_count; i++) { > - struct content *css = c->data.html.stylesheet_content[i]; > - char *source; > - int source_len; > - > - if (!css) > - continue; > - if (save_complete_list_check(css)) > - continue; > - > - if (i != STYLESHEET_STYLE) { > - if (!save_complete_list_add(css)) { > - warn_user("NoMemory", 0); > - return false; > - } > - } > - > - if (!save_imported_sheets(css, path)) > - return false; > - > - if (i == STYLESHEET_STYLE) > - continue; /* don't save <style> elements */ > - > - snprintf(spath, sizeof spath, "%s/%x", path, > - (unsigned int) css); > - source = rewrite_stylesheet_urls(css->source_data, > - css->source_size, &source_len, css->url); > - if (!source) { > - warn_user("NoMemory", 0); > - return false; > - } > - > -/* > - error = xosfile_save_stamped(spath, 0xf79, source, > - source + source_len); > -*/ > - > - if(fh = FOpen(spath,MODE_NEWFILE,0)) > - { > - FWrite(fh,source,1,source_len); > - FClose(fh); > - SetComment(spath,c->url); > - } > - > - free(source); > -/* > - if (error) { > - LOG(("xosfile_save_stamped: 0x%x: %s", > - error->errnum, error->errmess)); > - warn_user("SaveError", error->errmess); > - return false; > - } > -*/ > - } > - > - /* save objects */ > - for (i = 0; i != c->data.html.object_count; i++) { > - struct content *obj = c->data.html.object[i].content; > - > - /* skip difficult content types */ > - if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data) > - continue; > - if (save_complete_list_check(obj)) > - continue; > - > - if (!save_complete_list_add(obj)) { > - warn_user("NoMemory", 0); > - return false; > - } > - > - if (obj->type == CONTENT_HTML) { > - if (!save_complete_html(obj, path, false)) > - return false; > - continue; > - } > - > - snprintf(spath, sizeof spath, "%s/%x", path, > - (unsigned int) obj); > -/* > - error = xosfile_save_stamped(spath, > - ro_content_filetype(obj), > - obj->source_data, > - obj->source_data + obj->source_size); > - if (error) { > - LOG(("xosfile_save_stamped: 0x%x: %s", > - error->errnum, error->errmess)); > - warn_user("SaveError", error->errmess); > - return false; > - } > -*/ > - if(fh = FOpen(spath,MODE_NEWFILE,0)) > - { > - FWrite(fh,obj->source_data,1,obj->source_size); > - FClose(fh); > - SetComment(spath,obj->url); > - } > - > - } > - > - /*save_complete_list_dump();*/ > - > - /* make a copy of the document tree */ > - parser = htmlCreateMemoryParserCtxt(c->source_data, c->source_size); > - if (!parser) { > + int res; > + int namelen; > + namelen = strlen(path) + strlen(filename) + 2; > + char *fullpath = malloc(namelen); > + if (!fullpath) { > warn_user("NoMemory", 0); > return false; > } > - /* set parser charset */ > - if (c->data.html.encoding) { > - xmlCharEncodingHandler *enc_handler; > - enc_handler = > - xmlFindCharEncodingHandler(c->data.html.encoding); > - if (enc_handler) { > - xmlCtxtResetLastError(parser); > - if (xmlSwitchToEncoding(parser, enc_handler)) { > - xmlFreeDoc(parser->myDoc); > - htmlFreeParserCtxt(parser); > - warn_user("MiscError", > - "Encoding switch failed"); > - return false; > - } > - } > - } > - > - htmlParseDocument(parser); > - > - /* rewrite all urls we know about */ > - if (!rewrite_document_urls(parser->myDoc, c->data.html.base_url)) { > - xmlFreeDoc(parser->myDoc); > - htmlFreeParserCtxt(parser); > - warn_user("NoMemory", 0); > + snprintf(fullpath, namelen, "%s/%s", path, filename); > + FILE *f = fopen(fullpath, "w"); /* may need mode 'b' when c != NULL */ > + free(fullpath); > + if (f == NULL) > return false; > - } > - > - /* save the html file out last of all */ > - if (index) > - { > - struct DiskObject *dobj = NULL; > - > - snprintf(spath, sizeof spath, "%s/index", path); > - > - dobj = GetIconTags(NULL,ICONGETA_GetDefaultName,"html", > - > ICONGETA_GetDefaultType,WBPROJECT, > - TAG_DONE); > - > - PutIconTags(spath,dobj, > - ICONPUTA_NotifyWorkbench,TRUE, > - TAG_DONE); > - } > - else > - { > - snprintf(spath, sizeof spath, "%s/%x", path, (unsigned int)c); > - } > - > - errno = 0; > - if (htmlSaveFileFormat(spath, parser->myDoc, 0, 0) == -1) { > - if (errno) > - warn_user("SaveError", strerror(errno)); > - else > - warn_user("SaveError", "htmlSaveFileFormat failed"); > + res = fwrite(sourcedata, len, 1, f); > + fclose(f); > + save_complete_gui_filetype(path, filename, type); > + if (res != 1) > return false; > - } > - > -/* > - error = xosfile_set_type(spath, 0xfaf); > - if (error) { > - LOG(("xosfile_set_type: 0x%x: %s", > - error->errnum, error->errmess)); > - warn_user("SaveError", error->errmess); > - return false; > - } > -*/ > - > - xmlFreeDoc(parser->myDoc); > - htmlFreeParserCtxt(parser); > - > return true; > } > > - > -/** > - * Save stylesheets imported by a CONTENT_CSS. > - * > - * \param c a CONTENT_CSS > - * \param path path to save to > - * \return true on success, false on error and error reported > - */ > - > -bool save_imported_sheets(struct content *c, const char *path) > +int save_complete_htmlSaveFileFormat(const char *path, const char *filename, > + xmlDocPtr cur, const char *encoding, int format) > { > - char spath[256]; > - unsigned int j; > - char *source; > - int source_len; > - BPTR fh = 0; > - > - for (j = 0; j != c->data.css.import_count; j++) { > - struct content *css = c->data.css.import_content[j]; > - > - if (!css) > - continue; > - if (save_complete_list_check(css)) > - continue; > - > - if (!save_complete_list_add(css)) { > - warn_user("NoMemory", 0); > - return false; > - } > - > - if (!save_imported_sheets(css, path)) > - return false; > - > - snprintf(spath, sizeof spath, "%s/%x", path, > - (unsigned int) css); > - source = rewrite_stylesheet_urls(css->source_data, > - css->source_size, &source_len, css->url); > - if (!source) { > - warn_user("NoMemory", 0); > - return false; > - } > - > - if(fh = FOpen(spath,MODE_NEWFILE,0)) > - { > - FWrite(fh,source,1,source_len); > - FClose(fh); > - SetComment(spath,c->url); > - } > -/* > - error = xosfile_save_stamped(spath, 0xf79, source, > - source + source_len); > -*/ > - free(source); > -/* > - if (error) { > - LOG(("xosfile_save_stamped: 0x%x: %s", > - error->errnum, error->errmess)); > - warn_user("SaveError", error->errmess); > - return false; > - } > -*/ > + int ret; > + int len = strlen(path) + strlen(filename) + 2; > + char *finame = malloc(len); > + if (!finame){ > + warn_user("NoMemory", 0); > + return -1; > } > - > - return true; > + snprintf(finame, len, "%s/%s", path, filename); > + ret = htmlSaveFileFormat(finame, cur, encoding, format); > + free(finame); > + return ret; > } > > - > -/** > - * Initialise the save_complete module. > - */ > - > -void save_complete_init(void) > +bool save_complete_gui_filetype(const char *path, const char *filename, > + int type) > { > - /* Match an @import rule - see CSS 2.1 G.1. */ > - regcomp_wrapper(&save_complete_import_re, > - "@import" /* IMPORT_SYM */ > - "[ \t\r\n\f]*" /* S* */ > - /* 1 */ > - "(" /* [ */ > - /* 2 3 */ > - "\"(([^\"]|[\\]\")*)\"" /* STRING (approximated) */ > - "|" > - /* 4 5 */ > - "'(([^']|[\\]')*)'" > - "|" /* | */ > - "url\\([ \t\r\n\f]*" /* URI (approximated) */ > - /* 6 7 */ > - "\"(([^\"]|[\\]\")*)\"" > - "[ \t\r\n\f]*\\)" > - "|" > - "url\\([ \t\r\n\f]*" > - /* 8 9 */ > - "'(([^']|[\\]')*)'" > - "[ \t\r\n\f]*\\)" > - "|" > - "url\\([ \t\r\n\f]*" > - /* 10 */ > - "([^) \t\r\n\f]*)" > - "[ \t\r\n\f]*\\)" > - ")", /* ] */ > - REG_EXTENDED | REG_ICASE); > -} > - > - > -/** > - * Rewrite stylesheet \...@import rules for save complete. > - * > - * @param source stylesheet source > - * @param size size of source > - * @param osize updated with the size of the result > - * @param base url of stylesheet > - * @return converted source, or 0 on out of memory > - */ > - > -char * rewrite_stylesheet_urls(const char *source, unsigned int size, > - int *osize, const char *base) > -{ > - char *res; > - const char *url; > - char *url2; > - char buf[20]; > - unsigned int offset = 0; > - int url_len = 0; > - struct content *content; > - int m; > - unsigned int i; > - unsigned int imports = 0; > - regmatch_t match[11]; > - url_func_result result; > - > - /* count number occurences of @import to (over)estimate result size */ > - /* can't use strstr because source is not 0-terminated string */ > - for (i = 0; 7 < size && i != size - 7; i++) { > - if (source[i] == '@' && > - tolower(source[i + 1]) == 'i' && > - tolower(source[i + 2]) == 'm' && > - tolower(source[i + 3]) == 'p' && > - tolower(source[i + 4]) == 'o' && > - tolower(source[i + 5]) == 'r' && > - tolower(source[i + 6]) == 't') > - imports++; > + int ret; > + char deftype[5]; > + struct DiskObject *dobj = NULL; > + int len = strlen(path) + strlen(filename) + 2; > + char *finame = malloc(len); > + if (!finame){ > + warn_user("NoMemory", 0); > + return -1; > } > + snprintf(finame, len, "%s/%s", path, filename); > > - res = malloc(size + imports * 20); > - if (!res) > - return 0; > - *osize = 0; > - > - while (offset < size) { > - m = regexec(&save_complete_import_re, source + offset, > - 11, match, 0); > - if (m) > - break; > - > - /*for (unsigned int i = 0; i != 11; i++) { > - if (match[i].rm_so == -1) > - continue; > - fprintf(stderr, "%i: '%.*s'\n", i, > - match[i].rm_eo - match[i].rm_so, > - source + offset + match[i].rm_so); > - }*/ > - > - url = 0; > - if (match[2].rm_so != -1) { > - url = source + offset + match[2].rm_so; > - url_len = match[2].rm_eo - match[2].rm_so; > - } else if (match[4].rm_so != -1) { > - url = source + offset + match[4].rm_so; > - url_len = match[4].rm_eo - match[4].rm_so; > - } else if (match[6].rm_so != -1) { > - url = source + offset + match[6].rm_so; > - url_len = match[6].rm_eo - match[6].rm_so; > - } else if (match[8].rm_so != -1) { > - url = source + offset + match[8].rm_so; > - url_len = match[8].rm_eo - match[8].rm_so; > - } else if (match[10].rm_so != -1) { > - url = source + offset + match[10].rm_so; > - url_len = match[10].rm_eo - match[10].rm_so; > - } > - assert(url); > - > - url2 = strndup(url, url_len); > - if (!url2) { > - free(res); > - return 0; > - } > - result = url_join(url2, base, (char**)&url); > - free(url2); > - if (result == URL_FUNC_NOMEM) { > - free(res); > - return 0; > - } > - > - /* copy data before match */ > - memcpy(res + *osize, source + offset, match[0].rm_so); > - *osize += match[0].rm_so; > - > - if (result == URL_FUNC_OK) { > - content = save_complete_list_find(url); > - if (content) { > - /* replace import */ > - snprintf(buf, sizeof buf, "@import '%x'", > - (unsigned int) content); > - memcpy(res + *osize, buf, strlen(buf)); > - *osize += strlen(buf); > - } else { > - /* copy import */ > - memcpy(res + *osize, source + offset + > match[0].rm_so, > - match[0].rm_eo - match[0].rm_so); > - *osize += match[0].rm_eo - match[0].rm_so; > - } > - } > - else { > - /* copy import */ > - memcpy(res + *osize, source + offset + match[0].rm_so, > - match[0].rm_eo - match[0].rm_so); > - *osize += match[0].rm_eo - match[0].rm_so; > - } > - > - assert(0 < match[0].rm_eo); > - offset += match[0].rm_eo; > - } > - > - /* copy rest of source */ > - if (offset < size) { > - memcpy(res + *osize, source + offset, size - offset); > - *osize += size - offset; > - } > - > - return res; > -} > - > - > -/** > - * Rewrite URLs in a HTML document to be relative. > - * > - * \param doc root of the document tree > - * \param base base url of document > - * \return true on success, false on out of memory > - */ > - > -bool rewrite_document_urls(xmlDoc *doc, const char *base) > -{ > - xmlNode *node; > - > - for (node = doc->children; node; node = node->next) > - if (node->type == XML_ELEMENT_NODE) > - if (!rewrite_urls(node, base)) > - return false; > - > - return true; > -} > - > - > -/** > - * Traverse tree, rewriting URLs as we go. > - * > - * \param n xmlNode of type XML_ELEMENT_NODE to rewrite > - * \param base base url of document > - * \return true on success, false on out of memory > - * > - * URLs in the tree rooted at element n are rewritten. > - */ > - > -bool rewrite_urls(xmlNode *n, const char *base) > -{ > - xmlNode *child; > - > - assert(n->type == XML_ELEMENT_NODE); > - > - /** > - * We only need to consider the following cases: > - * > - * Attribute: Elements: > - * > - * 1) data <object> > - * 2) href <a> <area> <link> > - * 3) src <script> <input> <frame> <iframe> <img> > - * 4) n/a <style> > - * 5) n/a any <base> tag > - * 6) background any (except those above) > - */ > - if (!n->name) { > - /* ignore */ > - } > - /* 1 */ > - else if (strcmp(n->name, "object") == 0) { > - if (!rewrite_url(n, "data", base)) > + switch(type) > + { > + case 0xfaf: > + strcpy(deftype,"html"); > + break; > + case 0xf79: > + strcpy(deftype,"css"); > + break; > + default: > + free(finame); > return false; > + break; > } > - /* 2 */ > - else if (strcmp(n->name, "a") == 0 || > - strcmp(n->name, "area") == 0 || > - strcmp(n->name, "link") == 0) { > - if (!rewrite_url(n, "href", base)) > - return false; > - } > - /* 3 */ > - else if (strcmp(n->name, "frame") == 0 || > - strcmp(n->name, "iframe") == 0 || > - strcmp(n->name, "input") == 0 || > - strcmp(n->name, "img") == 0 || > - strcmp(n->name, "script") == 0) { > - if (!rewrite_url(n, "src", base)) > - return false; > - } > - /* 4 */ > - else if (strcmp(n->name, "style") == 0) { > - unsigned int len; > - xmlChar *content; > > - for (child = n->children; child != 0; child = child->next) { > - /* Get current content */ > - content = xmlNodeGetContent(child); > - if (!content) > - /* unfortunately we don't know if this is > - * due to memory exhaustion, or because > - * there is no content for this node */ > - continue; > + dobj = GetIconTags(NULL,ICONGETA_GetDefaultName,deftype, > + > ICONGETA_GetDefaultType,WBPROJECT, > + TAG_DONE); > > - /* Rewrite @import rules */ > - char *rewritten = rewrite_stylesheet_urls( > - content, > - strlen((char*)content), > - &len, base); > - xmlFree(content); > - if (!rewritten) > - return false; > + PutIconTags(finame,dobj, > + ICONPUTA_NotifyWorkbench,TRUE, > + TAG_DONE); > > - /* set new content */ > - xmlNodeSetContentLen(child, > - (const xmlChar*)rewritten, > - len); > - } > - > - return true; > - } > - /* 5 */ > - else if (strcmp(n->name, "base") == 0) { > - /* simply remove any <base> tags from the document */ > - xmlUnlinkNode(n); > - xmlFreeNode(n); > - /* base tags have no content, so there's no point recursing > - * additionally, we've just destroyed this node, so trying > - * to recurse would result in bad things happening */ > - return true; > - } > - /* 6 */ > - else { > - if (!rewrite_url(n, "background", base)) > - return false; > - } > - > - /* now recurse */ > - for (child = n->children; child;) { > - /* we must extract the next child now, as if the current > - * child is a <base> element, it will be removed from the > - * tree (see 5, above), thus preventing extraction of the > - * next child */ > - xmlNode *next = child->next; > - if (child->type == XML_ELEMENT_NODE) { > - if (!rewrite_urls(child, base)) > - return false; > - } > - child = next; > - } > - > + free(finame); > return true; > } > - > - > -/** > - * Rewrite an URL in a HTML document. > - * > - * \param n The node to modify > - * \param attr The html attribute to modify > - * \param base base url of document > - * \return true on success, false on out of memory > - */ > - > -bool rewrite_url(xmlNode *n, const char *attr, const char *base) > -{ > - char *url, *data; > - char rel[20]; > - struct content *content; > - url_func_result res; > - > - if (!xmlHasProp(n, (const xmlChar *) attr)) > - return true; > - > - data = xmlGetProp(n, (const xmlChar *) attr); > - if (!data) > - return false; > - > - res = url_join(data, base, &url); > - xmlFree(data); > - if (res == URL_FUNC_NOMEM) > - return false; > - else if (res == URL_FUNC_OK) { > - content = save_complete_list_find(url); > - if (content) { > - /* found a match */ > - free(url); > - snprintf(rel, sizeof rel, "%x", > - (unsigned int) content); > - if (!xmlSetProp(n, (const xmlChar *) attr, > - (xmlChar *) rel)) > - return false; > - } else { > - /* no match found */ > - if (!xmlSetProp(n, (const xmlChar *) attr, > - (xmlChar *) url)) { > - free(url); > - return false; > - } > - free(url); > - } > - } > - > - return true; > -} > - > - > -/** > - * Add a content to the save_complete_list. > - * > - * \param content content to add > - * \return true on success, false on out of memory > - */ > - > -bool save_complete_list_add(struct content *content) > -{ > - struct save_complete_entry *entry; > - entry = malloc(sizeof (*entry)); > - if (!entry) > - return false; > - entry->content = content; > - entry->next = save_complete_list; > - save_complete_list = entry; > - return true; > -} > - > - > -/** > - * Look up a url in the save_complete_list. > - * > - * \param url url to find > - * \return content if found, 0 otherwise > - */ > - > -struct content * save_complete_list_find(const char *url) > -{ > - struct save_complete_entry *entry; > - for (entry = save_complete_list; entry; entry = entry->next) > - if (strcmp(url, entry->content->url) == 0) > - return entry->content; > - return 0; > -} > - > - > -/** > - * Look up a content in the save_complete_list. > - * > - * \param content pointer to content > - * \return true if the content is in the save_complete_list > - */ > - > -bool save_complete_list_check(struct content *content) > -{ > - struct save_complete_entry *entry; > - for (entry = save_complete_list; entry; entry = entry->next) > - if (entry->content == content) > - return true; > - return false; > -} > - > - > -#if 0 > -/** > - * Dump save complete list to stderr > - */ > -void save_complete_list_dump(void) > -{ > - struct save_complete_entry *entry; > - for (entry = save_complete_list; entry; entry = entry->next) > - fprintf(stderr, "%p : %s\n", entry->content, > - entry->content->url); > -} > -#endif > - > - > -/** > - * Create the inventory file listing original URLs. > - */ > - > -bool save_complete_inventory(const char *path) > -{ > - char spath[256]; > - FILE *fp; > - > - snprintf(spath, sizeof spath, "%s/Inventory", path); > - > - fp = fopen(spath, "w"); > - if (!fp) { > - LOG(("fopen(): errno = %i", errno)); > - warn_user("SaveError", strerror(errno)); > - return false; > - } > - > - struct save_complete_entry *entry; > - for (entry = save_complete_list; entry; entry = entry->next) > - fprintf(fp, "%x %s\n", > - (unsigned int) entry->content, > - entry->content->url); > - > - fclose(fp); > - > - return true; > -} > -
-- Mark http://www.halloit.com Key ID 046B65CF
