Enlightenment CVS committal Author : kiwi Project : e_modules Module : news
Dir : e_modules/news/src/module Modified Files: Makefile.am News_includes.h news_config.c news_config_dialog.c news_feed.c news_feed.h news_parse.c Removed Files: news_parse_atom.c news_parse_atom.h news_parse_private.h news_parse_rss.c news_parse_rss.h Log Message: better gadget look, and some internal changes =================================================================== RCS file: /cvs/e/e_modules/news/src/module/Makefile.am,v retrieving revision 1.2 retrieving revision 1.3 diff -u -3 -r1.2 -r1.3 --- Makefile.am 23 May 2007 23:32:36 -0000 1.2 +++ Makefile.am 29 May 2007 05:57:49 -0000 1.3 @@ -34,11 +34,6 @@ news_menu.h \ news_parse.c \ news_parse.h \ - news_parse_private.h \ - news_parse_atom.c \ - news_parse_atom.h \ - news_parse_rss.c \ - news_parse_rss.h \ news_popup.c \ news_popup.h \ news_theme.c \ =================================================================== RCS file: /cvs/e/e_modules/news/src/module/News_includes.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -3 -r1.2 -r1.3 --- News_includes.h 23 May 2007 23:32:36 -0000 1.2 +++ News_includes.h 29 May 2007 05:57:49 -0000 1.3 @@ -3,8 +3,6 @@ #include "news_item.h" #include "news_feed.h" #include "news_parse.h" -#include "news_parse_rss.h" -#include "news_parse_atom.h" #include "news_viewer.h" #include "news_popup.h" #include "news_menu.h" =================================================================== RCS file: /cvs/e/e_modules/news/src/module/news_config.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -3 -r1.3 -r1.4 --- news_config.c 24 May 2007 11:32:43 -0000 1.3 +++ news_config.c 29 May 2007 05:57:49 -0000 1.4 @@ -166,7 +166,7 @@ E_CONFIG_LIMIT(news->config->feed.timer_m, NEWS_FEED_TIMER_MIN, NEWS_FEED_TIMER_MAX); E_CONFIG_LIMIT(news->config->feed.sort_name, 0, 1); E_CONFIG_LIMIT(news->config->proxy.enable, 0, 1); - E_CONFIG_LIMIT(news->config->proxy.port, 1024, 65535); + E_CONFIG_LIMIT(news->config->proxy.port, 1, 65535); E_CONFIG_LIMIT(news->config->viewer.vfeeds.unread_first, 0, 1); E_CONFIG_LIMIT(news->config->viewer.varticles.unread_first, 0, 1); E_CONFIG_LIMIT(news->config->viewer.varticles.sort_date, 0, 1); =================================================================== RCS file: /cvs/e/e_modules/news/src/module/news_config_dialog.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -3 -r1.4 -r1.5 --- news_config_dialog.c 23 May 2007 23:32:36 -0000 1.4 +++ news_config_dialog.c 29 May 2007 05:57:49 -0000 1.5 @@ -404,6 +404,7 @@ _advanced_apply_data(E_Config_Dialog *cfd, E_Config_Dialog_Data *cfdata) { News_Config *c; + int update_feeds = 0; c = news->config; @@ -416,7 +417,11 @@ news_feed_lists_refresh(1); } - c->proxy.enable = cfdata->proxy.enable; + if (c->proxy.enable != cfdata->proxy.enable) + { + c->proxy.enable = cfdata->proxy.enable; + update_feeds = 1; + } if (c->proxy.host) { evas_stringshare_del(c->proxy.host); @@ -487,6 +492,15 @@ c->popup_other.on_timeout = cfdata->popup_other.on_timeout; c->popup_other.timer_s = cfdata->popup_other.timer_s; + + /* update feeds */ + if (update_feeds) + { + NEWS_FEED_FOREACH_BEG(); + if (_feed->doc) + news_feed_update(_feed); + NEWS_FEED_FOREACH_END(); + } news_config_save(); return 1; =================================================================== RCS file: /cvs/e/e_modules/news/src/module/news_feed.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -3 -r1.3 -r1.4 --- news_feed.c 23 May 2007 23:32:37 -0000 1.3 +++ news_feed.c 29 May 2007 05:57:49 -0000 1.4 @@ -68,7 +68,7 @@ int i; /* create dynamic languages list from static one - we create thislist so its accessible from everywhere */ + we create this list so language are accessible from everywhere */ l = NULL; i = 0; while (_feed_langs[i].key) @@ -81,7 +81,7 @@ } news->langs = l; - /* validate each feed (+ create host and file strings + attach to category) */ + /* validate each feed and create host and file strings + attach to category */ NEWS_FEED_FOREACH_BEG(); if (!news_feed_edit(_feed, (char *)_feed->name, _feed->name_ovrw, =================================================================== RCS file: /cvs/e/e_modules/news/src/module/news_feed.h,v retrieving revision 1.4 retrieving revision 1.5 diff -u -3 -r1.4 -r1.5 --- news_feed.h 23 May 2007 23:32:37 -0000 1.4 +++ news_feed.h 29 May 2007 05:57:49 -0000 1.5 @@ -1,3 +1,4 @@ + #ifdef E_MOD_NEWS_TYPEDEFS typedef struct _News_Feed News_Feed; =================================================================== RCS file: /cvs/e/e_modules/news/src/module/news_parse.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -3 -r1.3 -r1.4 --- news_parse.c 21 May 2007 12:30:41 -0000 1.3 +++ news_parse.c 29 May 2007 05:57:49 -0000 1.4 @@ -1,10 +1,85 @@ #include "News.h" -#include "news_parse_private.h" + +typedef enum News_Parse_Oc + { + NEWS_PARSE_OC_DETECT_METAS, + NEWS_PARSE_OC_INFOS, + NEWS_PARSE_OC_ARTICLE_BEG, + NEWS_PARSE_OC_ITEM_TITLE, + NEWS_PARSE_OC_ITEM_TITLE_CLEAN, + NEWS_PARSE_OC_ITEM_DATE, + NEWS_PARSE_OC_ITEM_IMAGE, + NEWS_PARSE_OC_ITEM_LINK, + NEWS_PARSE_OC_ITEM_LINK_CLEAN, + NEWS_PARSE_OC_ARTICLE_EXISTS, + NEWS_PARSE_OC_ITEM_DESCRIPTION, + NEWS_PARSE_OC_ITEM_DESCRIPTION_CLEAN, + NEWS_PARSE_OC_ARTICLE_END, + NEWS_PARSE_OC_END + } News_Parse_Oc; + +typedef struct _News_Parse_Oc_Actions News_Parse_Oc_Actions; +typedef struct _News_Parse_Article News_Parse_Article; + +struct _News_Parse_Oc_Actions +{ + News_Parse_Oc action; + News_Parse_Oc after; +}; + +struct _News_Parse_Article +{ + News_Feed_Article *article; + + char *pos_end; + + char *title; + char *url; + char *image; + char *description; + struct tm date; +}; + +struct _News_Parse +{ + News_Feed_Document *doc; + void (*cb_func) (News_Feed_Document *doc, News_Parse_Error error, int changes); + char *buffer_pos; + + News_Parse_Oc_Actions oc; + Evas_List *articles; + Ecore_Idler *idler; + News_Parse_Article *art; + int error; + unsigned char changes : 1; +}; static Evas_List *_parsers; -static int _parse_type(News_Feed_Document *doc); -static void _update_doc(News_Parse *parser); +static int _parse_type(News_Feed_Document *doc); + +static int _idler(void *data); +static News_Parse_Error _idler_parse_detect_metas(News_Parse *parser); +static News_Parse_Error _idler_parse_infos(News_Parse *parser); +static News_Parse_Error _idler_parse_article_init(News_Parse *parser); +static News_Parse_Error _idler_parse_article_exists(News_Parse *parser); +static News_Parse_Error _idler_parse_article_end(News_Parse *parser); +static News_Parse_Error _idler_parse_item_title(News_Parse *parser); +static News_Parse_Error _idler_parse_item_date(News_Parse *parser); +static News_Parse_Error _idler_parse_item_image(News_Parse *parser); +static News_Parse_Error _idler_parse_item_link(News_Parse *parser); +static News_Parse_Error _idler_parse_item_description(News_Parse *parser); +static News_Parse_Error _idler_parse_item_clean(News_Parse *parser); + +static char *_meta_block_find(char **buf); +static char *_meta_block_end(char *buf, int beg_before, int end_before); + +static const char *_parse_infos_item(News_Parse *parser, char *meta, const char *prev, int *edit); + +static void _parse_finished(News_Parse *parser); +static void _update_doc(News_Parse *parser); +static void _parse_free(News_Parse *parser); +static void _parse_article_free(News_Parse_Article *a); /* * Public functions @@ -59,13 +134,15 @@ switch (doc->parse.type) { case NEWS_FEED_TYPE_RSS: - news_parse_rss_go(parser); - break; + parser->oc.action = NEWS_PARSE_OC_DETECT_METAS; + parser->idler = ecore_idler_add(_idler, parser); + return; case NEWS_FEED_TYPE_ATOM: - news_parse_atom_go(parser); - break; + parser->error = NEWS_PARSE_ERROR_NOT_IMPLEMENTED; + _parse_finished(parser); + return; case NEWS_FEED_TYPE_UNKNOWN: - break; + return; } } @@ -76,19 +153,720 @@ DPARSE(("Parse STOP")); - news_parse_free(doc->parse.parser); + _parse_free(doc->parse.parser); _parsers = evas_list_remove(_parsers, doc->parse.parser); doc->parse.parser = NULL; } - /* - * Functions avalaible to parse files only + * Private functions */ -char * -news_parse_meta_block_find(char **buf) +static int +_parse_type(News_Feed_Document *doc) +{ + char *buf, *p; + + buf = doc->server.buffer; + + p = strstr(buf, "<rss"); + if (p) + { + p = strstr(p, "version"); + if (!p) return 0; + doc->parse.type = NEWS_FEED_TYPE_RSS; + sscanf(p + 9, "%f", &doc->parse.version); + return 1; + } + + p = strstr(buf, "<rdf:RDF"); + if (p) + { + doc->parse.type = NEWS_FEED_TYPE_RSS; + doc->parse.version = 1.0; + return 1; + } + + p = strstr(buf, "<feed xmlns"); + if (p) + { + doc->parse.type = NEWS_FEED_TYPE_ATOM; + doc->parse.version = 1.0; + return 1; + } + + doc->parse.type = NEWS_FEED_TYPE_UNKNOWN; + doc->parse.version = 0.0; + + return 0; +} + +static int +_idler(void *data) +{ + News_Parse *parser; + News_Parse_Error err; + + parser = data; + + //DD(("%d", parser->oc.action)); + + switch(parser->oc.action) + { + case NEWS_PARSE_OC_DETECT_METAS: + err = _idler_parse_detect_metas(parser); + parser->oc.action = NEWS_PARSE_OC_INFOS; + break; + case NEWS_PARSE_OC_INFOS: + err = _idler_parse_infos(parser); + parser->oc.action = NEWS_PARSE_OC_ARTICLE_BEG; + break; + case NEWS_PARSE_OC_ARTICLE_BEG: + err = _idler_parse_article_init(parser); + if (parser->oc.action == NEWS_PARSE_OC_END) break; + parser->oc.action = NEWS_PARSE_OC_ITEM_TITLE; + break; + case NEWS_PARSE_OC_ARTICLE_END: + err = _idler_parse_article_end(parser); + parser->oc.action = NEWS_PARSE_OC_ARTICLE_BEG; + break; + case NEWS_PARSE_OC_ITEM_TITLE: + err = _idler_parse_item_title(parser); + parser->oc.action = NEWS_PARSE_OC_ITEM_TITLE_CLEAN; + parser->oc.after = NEWS_PARSE_OC_ITEM_DATE; + break; + case NEWS_PARSE_OC_ITEM_DATE: + err = _idler_parse_item_date(parser); + parser->oc.action = NEWS_PARSE_OC_ITEM_LINK; + break; + case NEWS_PARSE_OC_ITEM_LINK: + err = _idler_parse_item_link(parser); + parser->oc.action = NEWS_PARSE_OC_ITEM_LINK_CLEAN; + parser->oc.after = NEWS_PARSE_OC_ARTICLE_EXISTS; + break; + case NEWS_PARSE_OC_ARTICLE_EXISTS: + err = _idler_parse_article_exists(parser); + if (parser->oc.action == NEWS_PARSE_OC_ARTICLE_END) break; + parser->oc.action = NEWS_PARSE_OC_ITEM_IMAGE; + break; + case NEWS_PARSE_OC_ITEM_IMAGE: + err = _idler_parse_item_image(parser); + parser->oc.action = NEWS_PARSE_OC_ITEM_DESCRIPTION; + break; + case NEWS_PARSE_OC_ITEM_DESCRIPTION: + err = _idler_parse_item_description(parser); + parser->oc.action = NEWS_PARSE_OC_ITEM_DESCRIPTION_CLEAN; + parser->oc.after = NEWS_PARSE_OC_ARTICLE_END; + break; + case NEWS_PARSE_OC_ITEM_TITLE_CLEAN: + case NEWS_PARSE_OC_ITEM_LINK_CLEAN: + case NEWS_PARSE_OC_ITEM_DESCRIPTION_CLEAN: + err = _idler_parse_item_clean(parser); + parser->oc.action = parser->oc.after; + break; + case NEWS_PARSE_OC_END: + _parse_finished(parser); + return 0; + } + + /* error returned */ + if (err != NEWS_PARSE_ERROR_NO) + { + parser->error = err; + parser->oc.action = NEWS_PARSE_OC_END; + } + + return 1; +} + +static News_Parse_Error +_idler_parse_detect_metas(News_Parse *parser) +{ + News_Feed *feed; + News_Feed_Document *doc; + float ver; + + doc = parser->doc; + feed = doc->feed; + ver = doc->parse.version; + + /* already done in a previous update ? */ + if (doc->parse.meta_channel) return NEWS_PARSE_ERROR_NO; + + if (ver == 1.0) + { + if (strstr(doc->server.buffer, "<item rdf")) + doc->parse.meta_article = evas_stringshare_add("<item rdf"); + else + doc->parse.meta_article = evas_stringshare_add("<item>"); + + if (strstr(doc->server.buffer, "<channel rdf")) + doc->parse.meta_channel = evas_stringshare_add("<channel rdf"); + else + doc->parse.meta_channel = evas_stringshare_add("<channel>"); + + doc->parse.meta_date = evas_stringshare_add("<dc:date"); + } + else if ((ver > 0.0) && (ver <= 2.0)) + { + doc->parse.meta_article = evas_stringshare_add("<item>"); + doc->parse.meta_channel = evas_stringshare_add("<channel>"); + doc->parse.meta_date = evas_stringshare_add("<pubDate"); + } + + if (!doc->parse.meta_channel) + return NEWS_PARSE_ERROR_TYPE_UNKNOWN; + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_infos(News_Parse *parser) +{ + News_Feed *feed; + const char *name, *language, *description, *url_home, *icon; + int edit = 0; + + /* already done in a previous update ? */ + if (parser->doc->parse.got_infos) return NEWS_PARSE_ERROR_NO; + + feed = parser->doc->feed; + name = feed->name; + language = feed->language; + description = feed->description; + url_home = feed->url_home; + icon = feed->icon; + + if (feed->name_ovrw) + name = _parse_infos_item(parser, "<title", feed->name, &edit); + if (feed->language_ovrw) + language = _parse_infos_item(parser, "<language", feed->language, &edit); + if (feed->description_ovrw) + description = _parse_infos_item(parser, "<description", feed->description, &edit); + if (feed->url_home_ovrw) + url_home = _parse_infos_item(parser, "<link", feed->url_home, &edit); + //TODO handle feed image from server + /*if (feed->icon_ovrw) + icon = _parse_infos_item(parser, "<image", feed->icon, &edit); */ + + if (edit) + { + news_feed_edit(feed, + (char *)name, feed->name_ovrw, + (char *)language, feed->language_ovrw, + (char *)description, feed->description_ovrw, + (char *)url_home, feed->url_home_ovrw, + (char *)feed->url_feed, + (char *)icon, feed->icon_ovrw, + feed->important, + feed->category, + 0); + if (feed->item && feed->item->viewer) + news_viewer_refresh(feed->item->viewer); + news_feed_lists_refresh(1); + } + + parser->doc->parse.got_infos = 1; + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_article_init(News_Parse *parser) +{ + News_Parse_Article *art; + char *pos; + char *p1, *p2; + + pos = parser->buffer_pos; + + if (pos >= (parser->doc->server.buffer + parser->doc->server.buffer_size)) + { + parser->oc.action = NEWS_PARSE_OC_END; + return NEWS_PARSE_ERROR_NO; + } + + p1 = strstr(pos, parser->doc->parse.meta_article); + if (!p1) + { + if (evas_list_count(parser->articles)) + { + parser->oc.action = NEWS_PARSE_OC_END; + return NEWS_PARSE_ERROR_NO; + } + return NEWS_PARSE_ERROR_BROKEN_FEED; + } + p2 = _meta_block_end(p1, 1, 1); + if (!p2) return NEWS_PARSE_ERROR_BROKEN_FEED; + + art = E_NEW(News_Parse_Article, 1); + art->pos_end = p2; + + parser->art = art; + parser->buffer_pos = p1; + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_article_exists(News_Parse *parser) +{ + News_Parse_Article *art; + News_Feed_Article *a; + Evas_List *l; + + art = parser->art; + + if (!parser->doc->articles || + !evas_list_count(parser->doc->articles)) + return NEWS_PARSE_ERROR_NO; + + DPARSE(("ALREADY EXISTS : Go %s %s", + art->url, art->title)); + for (l=parser->doc->articles; l; l=evas_list_next(l)) + { + a = l->data; + + if ( + ( !a->reused ) + && + ( !news_util_datecmp(&art->date, &a->date) ) + && + ( ( (art->title && a->title) && + !strcmp(art->title, a->title) ) || + (!art->title && !a->title) ) + ) + { + art->article = a; + a->reused = 1; + parser->oc.action = NEWS_PARSE_OC_ARTICLE_END; + DPARSE(("ALREADY EXISTS : MATCH !!!")); + return NEWS_PARSE_ERROR_NO; + } + DD(("ALREADY EXISTS : NO Match %s %s", + a->url, a->title)); + } + + DPARSE(("ALREADY_EXISTS : DOES NOT exists")); + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_article_end(News_Parse *parser) +{ + parser->articles = evas_list_append(parser->articles, + parser->art); + parser->buffer_pos = parser->art->pos_end; + DPARSE(("Parse article %s end", parser->art->title)); + parser->art = NULL; + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_item_title(News_Parse *parser) +{ + News_Parse_Article *art; + char *pos; + char *p1, *p2; + int len; + + art = parser->art; + pos = parser->buffer_pos; + + p1 = strstr(pos, "<title"); + if (p1 && (p1 < art->pos_end)) + { + p2 = _meta_block_find(&p1); + if (!p2 || (p2 > art->pos_end)) + return NEWS_PARSE_ERROR_BROKEN_FEED; + len = p2 - p1; + art->title = E_NEW(char, len+1); + memcpy(art->title, p1, len); + art->title[len] = '\0'; + } + else + art->title = strdup("No title"); + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_item_date(News_Parse *parser) +{ + News_Parse_Article *art; + char *pos, *date; + char *p1, *p2; + int len, i; + struct tm *tm; + + art = parser->art; + pos = parser->buffer_pos; + + p1 = strstr(pos, parser->doc->parse.meta_date); + if (p1 && (p1 < art->pos_end)) + { + p2 = _meta_block_find(&p1); + if (!p2 || p2 > art->pos_end) + return NEWS_PARSE_ERROR_BROKEN_FEED; + + len = p2 - p1; + date = p1; + tm = &art->date; + + if ((parser->doc->parse.version == 1.0) && + (len >= 22)) + { + sscanf(date, "%4d", &i); + tm->tm_year = i - 1900; + sscanf(date+5, "%2d", &tm->tm_mon); + tm->tm_mon--; /* tm works with 0-11 */ + sscanf(date+8, "%2d", &tm->tm_mday); + sscanf(date+11, "%2d", &tm->tm_hour); + sscanf(date+14, "%2d", &tm->tm_min); + if (date[16] == ':') /* seconds are precised ? */ + sscanf(date+17, "%2d", &tm->tm_sec); + else + tm->tm_sec = 0; + } + else if (len >= 25) + { + sscanf(date+5, "%2d", &tm->tm_mday); + if (!strncmp(date+8, "Jan", 3)) tm->tm_mon = 0; + else if (!strncmp(date+8, "Feb", 3)) tm->tm_mon = 1; + else if (!strncmp(date+8, "Mar", 3)) tm->tm_mon = 2; + else if (!strncmp(date+8, "Apr", 3)) tm->tm_mon = 3; + else if (!strncmp(date+8, "May", 3)) tm->tm_mon = 4; + else if (!strncmp(date+8, "Jun", 3)) tm->tm_mon = 5; + else if (!strncmp(date+8, "Jul", 3)) tm->tm_mon = 6; + else if (!strncmp(date+8, "Aug", 3)) tm->tm_mon = 7; + else if (!strncmp(date+8, "Sep", 3)) tm->tm_mon = 8; + else if (!strncmp(date+8, "Oct", 3)) tm->tm_mon = 9; + else if (!strncmp(date+8, "Nov", 3)) tm->tm_mon = 10; + else if (!strncmp(date+8, "Dec", 3)) tm->tm_mon = 11; + else tm->tm_mon = 0; + sscanf(date+12, "%4d", &i); + tm->tm_year = i - 1900; + sscanf(date+17, "%2d", &tm->tm_hour); + sscanf(date+20, "%2d", &tm->tm_min); + sscanf(date+23, "%2d", &tm->tm_sec); + } + } + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_item_image(News_Parse *parser) +{ + // TODO parse and ecore_download + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_item_link(News_Parse *parser) +{ + News_Parse_Article *art; + char *pos; + char *p1, *p2; + int len; + + art = parser->art; + pos = parser->buffer_pos; + + p1 = strstr(pos, "<link"); + if (p1 && (p1 < art->pos_end)) + { + p2 = _meta_block_find(&p1); + if (!p2 || (p2 > art->pos_end)) + return NEWS_PARSE_ERROR_BROKEN_FEED; + len = p2 - p1; + art->url = E_NEW(char, len+1); + memcpy(art->url, p1, len); + art->url[len] = '\0'; + } + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_item_description(News_Parse *parser) +{ + News_Parse_Article *art; + char *pos; + char *p1, *p2; + int len; + + art = parser->art; + pos = parser->buffer_pos; + + p1 = strstr(pos, "<description"); + if (p1 && (p1 < art->pos_end)) + { + p2 = _meta_block_find(&p1); + if (!p2 || (p2 > art->pos_end)) + return NEWS_PARSE_ERROR_BROKEN_FEED; + len = p2 - p1; + art->description = E_NEW(char, len+1); + memcpy(art->description, p1, len); + art->description[len] = '\0'; + } + + return NEWS_PARSE_ERROR_NO; +} + +static News_Parse_Error +_idler_parse_item_clean(News_Parse *parser) +{ + char *buf, *p; + char **buf_org; + int size; + int new_size; /* future size, to avoid realloc on each memmove */ + + switch (parser->oc.action) + { + case NEWS_PARSE_OC_ITEM_TITLE_CLEAN: + buf_org = &parser->art->title; + break; + case NEWS_PARSE_OC_ITEM_LINK_CLEAN: + buf_org = &parser->art->url; + break; + case NEWS_PARSE_OC_ITEM_DESCRIPTION_CLEAN: + buf_org = &parser->art->description; + break; + default: + return NEWS_PARSE_ERROR_NO; + } + + buf = *buf_org; + if (!buf) return NEWS_PARSE_ERROR_NO; /* nothing to clean, ok */ + p = buf; + size = strlen(buf) + 1; + new_size = size; + + //FIXME use macros for nicer code + while (*p) + { + //DPARSE(("%d %d ------\n%30.30s", size, new_size, p)); + if (*p == '<') /* meta open, we are going to skip metas inside text */ + { + char *p2; + + /* skip cfdata */ + if (*(p + 1) == '!') + { + if (!strncmp(p, "<![CDATA[", 9)) + { + memmove(p, p + 9, (buf + new_size) - (p + 9)); + new_size -= 9; + } + else + p++; /* next char */ + } + else + { + /* skip normal meta */ + p2 = p; + do + { + p2++; + /* check > (>) */ + if (*p2 == '&') + { + if (!strncmp(p2 + 1, "gt;", 3)) + { + p2 += 3; + break; + } + } + } + while (*p2 && (*p2 != '>')); + if (*p2) + { + memmove(p, p2 + 1, (buf + new_size) - (p2 + 1)); + new_size -= ((p2 + 1) - p); + } + else + { + *p = '\0'; + new_size -= (p2 - p); + } + } + } + else if (*p == ']') /* skip end cfdata */ + { + if (!strncmp(p + 1, "]>", 2)) + { + memmove(p, p + 3, (buf + new_size) - (p + 3)); + new_size -= 3; + } + else + p++; /* next char */ + } + else if (*p == 0xa) /* \n */ + { + if (parser->oc.action == NEWS_PARSE_OC_ITEM_TITLE_CLEAN) + { + /* remove \n when in title */ + memmove(p, p + 1, (buf + new_size) - (p + 1)); + new_size -= 1; + } + else + { + new_size += 3; + if (new_size > size) + { + int diff; + + diff = p - buf; + size = new_size; + buf = realloc(buf, size); + p = buf + diff; + } + DPARSE(("buf %p p %p new_size %d strlen(buf) %d strlen(p) %d\np :\n%30.30s\n", buf, p, new_size, (int)strlen(buf), (int)strlen(p), p)); + memmove(p + 3, p, (buf + (new_size - 3)) - (p)); + *p = '<'; + *(p + 1) = 'b'; + *(p + 2) = 'r'; + *(p + 3) = '>'; + p = p + 4; + } + } + else if (*p == 0x9) /* \t */ + { + if (parser->oc.action == NEWS_PARSE_OC_ITEM_TITLE_CLEAN) + { + /* remove \t when in title */ + memmove(p, p + 1, (buf + new_size) - (p + 1)); + new_size -= 1; + } + else + { + new_size += 4; + if (new_size > size) + { + int diff; + + diff = p - buf; + size = new_size; + buf = realloc(buf, size); + p = buf + diff; + } + memmove(p + 4, p, (buf + (new_size - 4)) - (p)); + *p = '<'; + *(p + 1) = 't'; + *(p + 2) = 'a'; + *(p + 3) = 'b'; + *(p + 4) = '>'; + p = p + 5; + } + } + else if (*p == 0xd) /* \<???> (newline) to nothing */ + { + memmove(p, p + 1, (buf + new_size) - (p + 1)); + new_size -= 1; + } + else if (*p == '&') /* html codes */ + { + char *p_inc; + + p_inc = p + 1; + if (*p_inc == '#') /* html numbers = ascii hexadecimal char */ + { + char b[4]; + unsigned int i; + int len; + char *p2 = p + 2; + + while (*p2 && (*p2 != ';')) + p2++; + p2++; + len = p2 - p; + if ((len <= 3) || (len > 7) || !sscanf(p + 2, "%d", &i)) + p = p_inc; /* next char (invalid html char number) */ + else if ((i == 8216) || (i == 8217)) /* html num ' */ + { + memmove(p_inc, p + 7, (buf + new_size) - (p + 7)); + new_size -= 6; + *p = '\''; + p = p_inc; /* next char */ + } + else if ((i == 171) || (i == 187)) /* html num " */ + { + memmove(p_inc, p + 6, (buf + new_size) - (p + 6)); + new_size -= 5; + *p = '"'; + p = p_inc; /* next char */ + } + else /* html code convertion dec->hex */ + { + snprintf(b, sizeof(b), "%c", i); + memmove(p_inc, p2, (buf + new_size) - (p2)); + new_size -= (p2 - p - 1); + *p = b[0]; + /* stay on the same char, could be the start of meta/html char */ + } + } + else if (!strncmp(p_inc, "lt;", 3)) /* html < */ + { + memmove(p_inc, p + 4, (buf + new_size) - (p + 4)); + new_size -= 3; + *p = '<'; + /* stay on the same char, could be start of a meta */ + } /* > is checked when skipping meta */ + else if ( !strncmp(p_inc, "amp;", 4) && /* html & */ + ( (parser->oc.action != NEWS_PARSE_OC_ITEM_DESCRIPTION_CLEAN) || + (*(p_inc+4) == '#') ) ) /* but if we are going to write on a tb, we must not convert, except if the next char is an html num char (cause we need to parse it next) */ + //FIXME thats a quick fix, must fix evas_object_textblock so it supports & alone + { + memmove(p_inc, p + 5, (buf + new_size) - (p + 5)); + new_size -= 4; + *p = '&'; + /* stay on the same char, could be start of an html char */ + } + else if (!strncmp(p_inc, "quot;", 5)) /* html " */ + { + memmove(p_inc, p + 6, (buf + new_size) - (p + 6)); + new_size -= 5; + *p = '"'; + p = p_inc; /* next char */ + } + else if (!strncmp(p_inc, "apos;", 5)) /* html ' */ + { + memmove(p_inc, p + 6, (buf + new_size) - (p + 6)); + new_size -= 5; + *p = '\''; + p = p_inc; /* next char */ + } + else if (!strncmp(p_inc, "nbsp;", 5)) /* html space */ + { + memmove(p_inc, p + 6, (buf + new_size) - (p + 6)); + new_size -= 5; + *p = ' '; + p = p_inc; /* next char */ + } + else + p = p_inc; /* next char */ + } + else + p++; /* nothing interesting :) next char */ + } + + + DPARSE(("Item clean: size: %d new_size: %d", size, new_size)); + + buf = realloc(buf, new_size); + *buf_org = buf; + + return NEWS_PARSE_ERROR_NO; +} + + +static char * +_meta_block_find(char **buf) { char *p1, *p2; @@ -100,14 +878,14 @@ p1++; *buf = p1; - p2 = news_parse_meta_block_end(*buf, 0, 1); + p2 = _meta_block_end(*buf, 0, 1); if (!p2) return NULL; return p2; } -char * -news_parse_meta_block_end(char *buf, int beg_before, int end_before) +static char * +_meta_block_end(char *buf, int beg_before, int end_before) { char *p; int layer; @@ -195,8 +973,36 @@ return p; } -void -news_parse_finished(News_Parse *parser) +static const char * +_parse_infos_item(News_Parse *parser, char *meta, const char *prev, int *edit) +{ + const char *item; + char *ibeg, *iend; + int ilen; + char buf[8192]; + + item = prev; + ibeg = strstr(parser->buffer_pos, meta); + if ( ibeg && + (ibeg < (parser->doc->server.buffer + parser->doc->server.buffer_size)) && + (iend = _meta_block_find(&ibeg)) ) + { + ilen = iend - ibeg; + if ( !prev || (prev && strncmp(ibeg, prev, ilen)) ) + { + memcpy(buf, ibeg, ilen); + buf[ilen] = '\0'; + item = evas_stringshare_add(buf); + *edit = 1; + } + } + + return item; +} + + +static void +_parse_finished(News_Parse *parser) { News_Feed_Document *doc; @@ -215,82 +1021,7 @@ parser->doc->parse.parser = NULL; _parsers = evas_list_remove(_parsers, parser); - news_parse_free(parser); -} - -void -news_parse_free(News_Parse *parser) -{ - if (parser->doc) parser->doc->parse.parser = NULL; - - while (parser->articles) - { - News_Parse_Article *a; - - a = parser->articles->data; - news_parse_article_free(a); - parser->articles = evas_list_remove_list(parser->articles, - parser->articles); - } - if (parser->idler) ecore_idler_del(parser->idler); - if (parser->art) news_parse_article_free(parser->art); - - free(parser); -} - -void -news_parse_article_free(News_Parse_Article *a) -{ - E_FREE(a->title); - E_FREE(a->url); - //E_FREE(a->image); //TODO - E_FREE(a->description); - - free(a); -} - - -/* - * Private functions - */ - -static int -_parse_type(News_Feed_Document *doc) -{ - char *buf, *p; - - buf = doc->server.buffer; - - p = strstr(buf, "<rss"); - if (p) - { - p = strstr(p, "version"); - if (!p) return 0; - doc->parse.type = NEWS_FEED_TYPE_RSS; - sscanf(p + 9, "%f", &doc->parse.version); - return 1; - } - - p = strstr(buf, "<rdf:RDF"); - if (p) - { - doc->parse.type = NEWS_FEED_TYPE_RSS; - doc->parse.version = 1.0; - return 1; - } - - p = strstr(buf, "<feed xmlns"); - if (p) - { - doc->parse.type = NEWS_FEED_TYPE_ATOM; - doc->parse.version = 1.0; - return 1; - } - - doc->parse.type = NEWS_FEED_TYPE_UNKNOWN; - doc->parse.version = 0.0; - - return 0; + _parse_free(parser); } static void @@ -338,7 +1069,7 @@ fa->reused = 0; parser->doc->articles = evas_list_remove(parser->doc->articles, fa); } - news_parse_article_free(pa); + _parse_article_free(pa); list = evas_list_append(list, fa); } @@ -382,4 +1113,35 @@ FIXME: if sorting, no need to KEEP THE ORDER upper, so can optimize */ +} + +static void +_parse_free(News_Parse *parser) +{ + if (parser->doc) parser->doc->parse.parser = NULL; + + while (parser->articles) + { + News_Parse_Article *a; + + a = parser->articles->data; + _parse_article_free(a); + parser->articles = evas_list_remove_list(parser->articles, + parser->articles); + } + if (parser->idler) ecore_idler_del(parser->idler); + if (parser->art) _parse_article_free(parser->art); + + free(parser); +} + +static void +_parse_article_free(News_Parse_Article *a) +{ + E_FREE(a->title); + E_FREE(a->url); + //E_FREE(a->image); //TODO + E_FREE(a->description); + + free(a); } ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ enlightenment-cvs mailing list enlightenment-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/enlightenment-cvs