hello, just some fixes. changes (most of them from debian[1]): - close every file after processing, not at the end of program. - remove limited built-in http support. - support UTF-8 encoding when processing input. - don't use backspaces. - skip numbers in html tag. - recognize all <meta> tags, not just one. - recode input according to 'meta http-equiv' in html document. (needs iconv) - convert output to user's locale charset (needs iconv) - correctly specify NULLs for 64-bit architectures. - substituted 'char*' with 'const char*' in needed places to avoid 'deprecated conversion from string constant to ‘char*’' warnings. - validate --width parameter input. - fix CXX variable on configure and now should compile with clang.
for conversion, I added iconv as WANTLIB; maybe we can create a FLAVOR but I think that should be acceptable for everyday usage. two questions: - how can I test the port with clang/llvm? some nice mk.conf variable? - would be nice to add -utf8 to man: I need to use mg/vi or there is another tool? tested on amd64. thanks /davide [1] http://sources.debian.net/patches/html2text/1.3.2a-18/
diff -Nrua -x CVS /usr/ports/converters/html2text/Makefile ./Makefile --- /usr/ports/converters/html2text/Makefile Sat Apr 25 23:58:01 2015 +++ ./Makefile Sat Jan 21 21:48:09 2017 @@ -5,16 +5,16 @@ COMMENT= advanced HTML-to-text converter DISTNAME= html2text-1.3.2a -REVISION= 1 +REVISION= 2 CATEGORIES= converters textproc - HOMEPAGE= http://www.mbayer.de/html2text/ -# GPL +# GPLv2 PERMIT_PACKAGE_CDROM= Yes -WANTLIB += c m stdc++ +WANTLIB += c m stdc++ iconv +LIB_DEPENDS += converters/libiconv MASTER_SITES= http://www.mbayer.de/html2text/downloads/ \ ftp://ftp.ibiblio.org/pub/linux/apps/www/converters/ @@ -23,7 +23,9 @@ MAKE_FLAGS= PREFIX="${PREFIX}" \ CXXFLAGS="-DVERSION=1.3.2a -DAUTO_PTR_BROKEN ${CXXFLAGS}" \ - CXX="${CXX}" + CXXFLAGS+="-I${LOCALBASE}/include" \ + CXX="${CXX}" \ + LDFLAGS="-L${LOCALBASE}/lib -liconv" NO_TEST= Yes diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-Area_C ./patches/patch-Area_C --- /usr/ports/converters/html2text/patches/patch-Area_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-Area_C Sat Jan 21 20:58:10 2017 @@ -0,0 +1,109 @@ +$OpenBSD$ +--- Area.C.orig Sun Nov 23 12:05:29 2003 ++++ Area.C Sat Jan 21 20:57:57 2017 +@@ -36,10 +36,13 @@ + #include <iostream> + + #include "Area.h" ++#include "html.h" + #include "string.h" + + #define LATIN1_nbsp 160 + ++extern int use_encoding; ++ + /* ------------------------------------------------------------------------- */ + + #define malloc_array(type, size)\ +@@ -81,6 +84,53 @@ Line::~Line() + + /* ------------------------------------------------------------------------- */ + ++/* utf_length() and utf_width() ++ * ++ * Very simplified algorithm of calculating length of UTF-8 ++ * string. No check for errors. Counting only ASCII bytes and ++ * leading bytes of UTF-8 multibyte sequences. All bytes like ++ * 10xxxxxx are dropped. If USE_UTF8 is false then returns ++ * usual length. --YS ++ */ ++ ++size_t utf8_aux_count(char ch) ++{ ++ if((ch & 0xe0) == 0xc0) ++ { ++ return 1; ++ } ++ else if((ch & 0xf0) == 0xe0) ++ { ++ return 2; ++ } ++ else if ((ch & 0xf8) == 0xf0) ++ { ++ return 3; ++ } ++ else ++ { ++ return 0; ++ } ++} ++ ++unsigned int ++Line::utf_length(size_type f, size_type t) const ++{ ++ size_type m = (t < length_ ? t : length_); ++ size_type r = m - f; ++ if(USE_UTF8) ++ { ++ for (int i = f; i < m; i++) ++ { ++ char& ch = cells_[i].character; ++ size_type aux_count = utf8_aux_count(ch); ++ r -= aux_count; ++ i += aux_count; ++ } ++ } ++ return r; ++} ++ + void + Line::resize(size_type l) + { +@@ -236,6 +286,28 @@ Area::operator>>=(size_type rs) + return *this; + } + ++unsigned int ++Area::utf_width() ++{ ++ size_type r = width_; ++ if(USE_UTF8) { r = 0; ++ for (size_type yy = 0; yy < height_; yy++) { ++ int i = width_ - 1; ++ while((i >= 0) && isspace(cells_[yy][i].character)) ++ { ++ --i; ++ } ++ size_type aux_count_sum = 0; ++ for (; i >= 0; i--) { ++ aux_count_sum += utf8_aux_count(cells_[yy][i].character); ++ } ++ size_type r1 = width_ - aux_count_sum; ++ if(r < r1) r = r1; ++ } ++ } ++ return r; ++} ++ + void + Area::resize(size_type w, size_type h) + { +@@ -439,7 +511,7 @@ operator<<(ostream &os, const Area &a) + char c = p->character; + char a = p->attribute; + +- if (c == (char) LATIN1_nbsp) c = ' '; ++ if (c == (char) LATIN1_nbsp && !USE_UTF8) c = ' '; + + if (a == Cell::NONE) { + os << c; diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-Area_h ./patches/patch-Area_h --- /usr/ports/converters/html2text/patches/patch-Area_h Thu Jan 1 01:00:00 1970 +++ ./patches/patch-Area_h Sat Jan 21 20:58:10 2017 @@ -0,0 +1,21 @@ +$OpenBSD$ +--- Area.h.orig Sun Nov 23 12:05:29 2003 ++++ Area.h Sat Jan 21 20:57:57 2017 +@@ -81,6 +81,8 @@ class Line { (public) + Cell &operator[](size_type x) { return cells_[x]; } + const Cell *cells() const { return cells_; } + ++ unsigned int utf_length(size_type f, size_type t) const; ++ + void resize(size_type l); + void enlarge(size_type l) { if (l > length_) resize(l); } + +@@ -133,6 +135,8 @@ class Area { (public) + const Cell *operator[](size_type y) const { return cells_[y]; } + Cell *operator[](size_type y) { return cells_[y]; } + const Area &operator>>=(size_type rs); ++ ++ unsigned int utf_width(); + + void resize(size_type w, size_type h); + void enlarge(size_type w, size_type h); diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-HTMLControl_C ./patches/patch-HTMLControl_C --- /usr/ports/converters/html2text/patches/patch-HTMLControl_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-HTMLControl_C Sat Jan 21 20:58:10 2017 @@ -0,0 +1,12 @@ +$OpenBSD$ +--- HTMLControl.C.orig Sun Nov 23 12:05:29 2003 ++++ HTMLControl.C Sat Jan 21 20:57:57 2017 +@@ -372,7 +372,7 @@ HTMLControl::yylex2(yy_HTMLParser_stype *value_return, + attribute.first = c; + for (;;) { + c = get_char(); +- if (!isalpha(c) && c != '-' && c != '_' && c != ':') break; ++ if (!isalnum(c) && c != '-' && c != '_' && c != ':') break; + // Same as in line 352 - Arno + attribute.first += c; + } diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-HTMLControl_h ./patches/patch-HTMLControl_h --- /usr/ports/converters/html2text/patches/patch-HTMLControl_h Thu Jan 1 01:00:00 1970 +++ ./patches/patch-HTMLControl_h Sat Jan 21 20:58:10 2017 @@ -0,0 +1,29 @@ +$OpenBSD$ +--- HTMLControl.h.orig Sun Nov 23 12:05:29 2003 ++++ HTMLControl.h Sat Jan 21 20:57:57 2017 +@@ -38,7 +38,6 @@ + /* ------------------------------------------------------------------------- */ + + #include "HTMLParser.h" +-#include "urlistream.h" + #include <istream> + + using std::istream; +@@ -48,7 +47,7 @@ using std::istream; + class HTMLControl : public HTMLParser { + + public: +- HTMLControl(urlistream &is_, bool debug_scanner_, bool debug_parser_) : ++ HTMLControl(istream &is_, bool debug_scanner_, bool debug_parser_) : + HTMLParser(), + current_line(1), + current_column(0), +@@ -84,7 +83,7 @@ class HTMLControl : public HTMLParser { (private) + + bool debug_scanner; + +- urlistream &is; ++ istream &is; + int ungotten_chars[5]; + int number_of_ungotten_chars; + }; diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-HTMLParser_C ./patches/patch-HTMLParser_C --- /usr/ports/converters/html2text/patches/patch-HTMLParser_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-HTMLParser_C Sat Jan 21 20:58:10 2017 @@ -0,0 +1,1090 @@ +$OpenBSD$ +--- HTMLParser.C.orig Sun Nov 23 12:05:29 2003 ++++ HTMLParser.C Sat Jan 21 20:57:57 2017 +@@ -600,7 +600,7 @@ static const int END_VAR; + /* decl const */ + public: + int YY_HTMLParser_PARSE (YY_HTMLParser_PARSE_PARAM); +- virtual void YY_HTMLParser_ERROR(char *msg) YY_HTMLParser_ERROR_BODY; ++ virtual void YY_HTMLParser_ERROR(const char *msg) YY_HTMLParser_ERROR_BODY; + #ifdef YY_HTMLParser_PURE + #ifdef YY_HTMLParser_LSP_NEEDED + virtual int YY_HTMLParser_LEX (YY_HTMLParser_STYPE *YY_HTMLParser_LVAL,YY_HTMLParser_LTYPE *YY_HTMLParser_LLOC) YY_HTMLParser_LEX_BODY; +@@ -911,28 +911,28 @@ static const short yyrhs[] = { 132, + #if YY_HTMLParser_DEBUG != 0 + static const short yyrline[] = { 0, + 273, 304, 309, 312, 315, 319, 322, 326, 329, 333, +- 336, 339, 342, 345, 353, 361, 365, 368, 373, 376, +- 379, 384, 392, 396, 399, 407, 415, 420, 423, 426, +- 431, 442, 446, 454, 458, 461, 466, 471, 475, 478, +- 481, 487, 493, 499, 505, 510, 519, 520, 527, 527, +- 534, 534, 541, 541, 550, 554, 557, 563, 570, 575, +- 582, 591, 600, 604, 607, 611, 617, 623, 631, 637, +- 645, 650, 653, 658, 662, 665, 670, 678, 686, 690, +- 693, 701, 705, 708, 714, 721, 727, 737, 742, 747, +- 749, 750, 751, 752, 753, 761, 763, 764, 765, 766, +- 767, 768, 769, 770, 773, 775, 776, 777, 778, 779, +- 780, 781, 784, 795, 800, 808, 814, 819, 824, 832, +- 836, 840, 848, 852, 855, 861, 867, 873, 881, 886, +- 891, 901, 903, 904, 905, 906, 907, 910, 912, 913, +- 914, 915, 916, 921, 921, 922, 922, 923, 923, 924, +- 924, 926, 926, 927, 927, 929, 929, 930, 930, 931, +- 931, 932, 932, 933, 933, 934, 934, 935, 935, 936, +- 936, 937, 937, 938, 938, 939, 939, 940, 940, 941, +- 941, 942, 942, 943, 943, 944, 944, 945, 945, 946, +- 946, 947, 947, 948, 948, 949, 949, 950, 950, 951, +- 951, 952, 952, 953, 953, 954, 954, 955, 955, 956, +- 956, 957, 957, 958, 958, 959, 959, 960, 960, 961, +- 961, 963, 963 ++ 336, 339, 344, 347, 355, 363, 367, 370, 375, 378, ++ 381, 386, 394, 398, 401, 409, 417, 422, 425, 428, ++ 433, 444, 448, 456, 460, 463, 468, 473, 477, 480, ++ 483, 489, 495, 501, 507, 512, 521, 522, 529, 529, ++ 536, 536, 543, 543, 552, 556, 559, 565, 572, 577, ++ 584, 593, 602, 606, 609, 613, 619, 625, 633, 639, ++ 647, 652, 655, 660, 664, 667, 672, 680, 688, 692, ++ 695, 703, 707, 710, 716, 723, 729, 739, 744, 749, ++ 751, 752, 753, 754, 755, 763, 765, 766, 767, 768, ++ 769, 770, 771, 772, 775, 777, 778, 779, 780, 781, ++ 782, 783, 786, 797, 802, 810, 816, 821, 826, 834, ++ 838, 842, 850, 854, 857, 863, 869, 875, 883, 888, ++ 893, 903, 905, 906, 907, 908, 909, 912, 914, 915, ++ 916, 917, 918, 923, 923, 924, 924, 925, 925, 926, ++ 926, 928, 928, 929, 929, 931, 931, 932, 932, 933, ++ 933, 934, 934, 935, 935, 936, 936, 937, 937, 938, ++ 938, 939, 939, 940, 940, 941, 941, 942, 942, 943, ++ 943, 944, 944, 945, 945, 946, 946, 947, 947, 948, ++ 948, 949, 949, 950, 950, 951, 951, 952, 952, 953, ++ 953, 954, 954, 955, 955, 956, 956, 957, 957, 958, ++ 958, 959, 959, 960, 960, 961, 961, 962, 962, 963, ++ 963, 965, 965 + }; + + static const char * const yytname[] = { "$","error","$illegal.","DOCTYPE", +@@ -2044,17 +2044,19 @@ case 11: + case 12: + #line 339 "HTMLParser.y" + { +- (yyval.document = yyvsp[-1].document)->head.meta_attributes.reset(yyvsp[0].tag_attributes); ++ auto_ptr<Meta> s(new Meta); ++ s->attributes.reset(yyvsp[0].tag_attributes); ++ (yyval.document = yyvsp[-1].document)->head.metas.push_back(s); + ; + break;} + case 13: +-#line 342 "HTMLParser.y" ++#line 344 "HTMLParser.y" + { + (yyval.document = yyvsp[-1].document)->head.link_attributes.reset(yyvsp[0].tag_attributes); + ; + break;} + case 14: +-#line 345 "HTMLParser.y" ++#line 347 "HTMLParser.y" + { + auto_ptr<Script> s(new Script); + s->attributes.reset(yyvsp[0].tag_attributes); +@@ -2065,7 +2067,7 @@ case 14: + ; + break;} + case 15: +-#line 353 "HTMLParser.y" ++#line 355 "HTMLParser.y" + { + auto_ptr<Style> s(new Style); + s->attributes.reset(yyvsp[0].tag_attributes); +@@ -2076,20 +2078,20 @@ case 15: + ; + break;} + case 16: +-#line 361 "HTMLParser.y" ++#line 363 "HTMLParser.y" + { + delete yyvsp[0].tag_attributes; + yyval.document = yyvsp[-1].document; + ; + break;} + case 17: +-#line 365 "HTMLParser.y" ++#line 367 "HTMLParser.y" + { + yyval.document = yyvsp[-1].document; + ; + break;} + case 18: +-#line 368 "HTMLParser.y" ++#line 370 "HTMLParser.y" + { + Paragraph *p = new Paragraph; + p->texts.reset(yyvsp[0].element_list); +@@ -2097,25 +2099,25 @@ case 18: + ; + break;} + case 19: +-#line 373 "HTMLParser.y" ++#line 375 "HTMLParser.y" + { + (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].heading)); + ; + break;} + case 20: +-#line 376 "HTMLParser.y" ++#line 378 "HTMLParser.y" + { + (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 21: +-#line 379 "HTMLParser.y" ++#line 381 "HTMLParser.y" + { + (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].address)); + ; + break;} + case 22: +-#line 385 "HTMLParser.y" ++#line 387 "HTMLParser.y" + { + yyval.pcdata = new PCData; + yyval.pcdata->text = *yyvsp[0].strinG; +@@ -2123,19 +2125,19 @@ case 22: + ; + break;} + case 23: +-#line 393 "HTMLParser.y" ++#line 395 "HTMLParser.y" + { + yyval.element_list = new list<auto_ptr<Element> >; + ; + break;} + case 24: +-#line 396 "HTMLParser.y" ++#line 398 "HTMLParser.y" + { + yyval.element_list = yyvsp[-1].element_list; + ; + break;} + case 25: +-#line 399 "HTMLParser.y" ++#line 401 "HTMLParser.y" + { + auto_ptr<Script> s(new Script); + s->attributes.reset(yyvsp[0].tag_attributes); +@@ -2146,7 +2148,7 @@ case 25: + ; + break;} + case 26: +-#line 407 "HTMLParser.y" ++#line 409 "HTMLParser.y" + { + auto_ptr<Style> s(new Style); + s->attributes.reset(yyvsp[0].tag_attributes); +@@ -2157,7 +2159,7 @@ case 26: + ; + break;} + case 27: +-#line 415 "HTMLParser.y" ++#line 417 "HTMLParser.y" + { + Paragraph *p = new Paragraph; + p->texts = auto_ptr<list<auto_ptr<Element> > >(yyvsp[0].element_list); +@@ -2165,25 +2167,25 @@ case 27: + ; + break;} + case 28: +-#line 420 "HTMLParser.y" ++#line 422 "HTMLParser.y" + { + (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].heading)); + ; + break;} + case 29: +-#line 423 "HTMLParser.y" ++#line 425 "HTMLParser.y" + { + (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 30: +-#line 426 "HTMLParser.y" ++#line 428 "HTMLParser.y" + { + (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].address)); + ; + break;} + case 31: +-#line 432 "HTMLParser.y" ++#line 434 "HTMLParser.y" + { + /* EXTENSION: Allow paragraph content in heading, not only texts */ + if (yyvsp[-2].heading->level != yyvsp[0].inT) { +@@ -2194,13 +2196,13 @@ case 31: + ; + break;} + case 32: +-#line 443 "HTMLParser.y" ++#line 445 "HTMLParser.y" + { + yyval.element = yyvsp[0].element; + ; + break;} + case 33: +-#line 446 "HTMLParser.y" ++#line 448 "HTMLParser.y" + { + Paragraph *p = new Paragraph; + p->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2209,19 +2211,19 @@ case 33: + ; + break;} + case 34: +-#line 455 "HTMLParser.y" ++#line 457 "HTMLParser.y" + { + yyval.element_list = new list<auto_ptr<Element> >; + ; + break;} + case 35: +-#line 458 "HTMLParser.y" ++#line 460 "HTMLParser.y" + { + yyval.element_list = yyvsp[-1].element_list; + ; + break;} + case 36: +-#line 461 "HTMLParser.y" ++#line 463 "HTMLParser.y" + { + yyval.element_list = yyvsp[-1].element_list; + yyval.element_list->splice(yyval.element_list->end(), *yyvsp[0].element_list); +@@ -2229,31 +2231,31 @@ case 36: + ; + break;} + case 37: +-#line 466 "HTMLParser.y" ++#line 468 "HTMLParser.y" + { + (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 38: +-#line 472 "HTMLParser.y" ++#line 474 "HTMLParser.y" + { + yyval.element = yyvsp[0].element; + ; + break;} + case 39: +-#line 475 "HTMLParser.y" ++#line 477 "HTMLParser.y" + { + yyval.element = yyvsp[0].preformatted; + ; + break;} + case 40: +-#line 478 "HTMLParser.y" ++#line 480 "HTMLParser.y" + { + yyval.element = yyvsp[0].definition_list; + ; + break;} + case 41: +-#line 481 "HTMLParser.y" ++#line 483 "HTMLParser.y" + { + Division *p = new Division; + p->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2262,7 +2264,7 @@ case 41: + ; + break;} + case 42: +-#line 487 "HTMLParser.y" ++#line 489 "HTMLParser.y" + { + Center *p = new Center; + delete yyvsp[-2].tag_attributes; // CENTER has no attributes. +@@ -2271,7 +2273,7 @@ case 42: + ; + break;} + case 43: +-#line 493 "HTMLParser.y" ++#line 495 "HTMLParser.y" + { + delete yyvsp[-2].tag_attributes; // BLOCKQUOTE has no attributes! + BlockQuote *bq = new BlockQuote; +@@ -2280,7 +2282,7 @@ case 43: + ; + break;} + case 44: +-#line 499 "HTMLParser.y" ++#line 501 "HTMLParser.y" + { + Form *f = new Form; + f->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2289,7 +2291,7 @@ case 44: + ; + break;} + case 45: +-#line 505 "HTMLParser.y" ++#line 507 "HTMLParser.y" + { + HorizontalRule *h = new HorizontalRule; + h->attributes.reset(yyvsp[0].tag_attributes); +@@ -2297,7 +2299,7 @@ case 45: + ; + break;} + case 46: +-#line 510 "HTMLParser.y" ++#line 512 "HTMLParser.y" + { + Table *t = new Table; + t->attributes.reset(yyvsp[-3].tag_attributes); +@@ -2307,11 +2309,11 @@ case 46: + ; + break;} + case 47: +-#line 520 "HTMLParser.y" ++#line 522 "HTMLParser.y" + { ++list_nesting; ; + break;} + case 48: +-#line 520 "HTMLParser.y" ++#line 522 "HTMLParser.y" + { + OrderedList *ol = new OrderedList; + ol->attributes.reset(yyvsp[-3].tag_attributes); +@@ -2321,11 +2323,11 @@ case 48: + ; + break;} + case 49: +-#line 527 "HTMLParser.y" ++#line 529 "HTMLParser.y" + { ++list_nesting; ; + break;} + case 50: +-#line 527 "HTMLParser.y" ++#line 529 "HTMLParser.y" + { + UnorderedList *ul = new UnorderedList; + ul->attributes.reset(yyvsp[-3].tag_attributes); +@@ -2335,11 +2337,11 @@ case 50: + ; + break;} + case 51: +-#line 534 "HTMLParser.y" ++#line 536 "HTMLParser.y" + { ++list_nesting; ; + break;} + case 52: +-#line 534 "HTMLParser.y" ++#line 536 "HTMLParser.y" + { + Dir *d = new Dir; + d->attributes.reset(yyvsp[-3].tag_attributes); +@@ -2349,11 +2351,11 @@ case 52: + ; + break;} + case 53: +-#line 541 "HTMLParser.y" ++#line 543 "HTMLParser.y" + { ++list_nesting; ; + break;} + case 54: +-#line 541 "HTMLParser.y" ++#line 543 "HTMLParser.y" + { + Menu *m = new Menu; + m->attributes.reset(yyvsp[-3].tag_attributes); +@@ -2363,26 +2365,26 @@ case 54: + ; + break;} + case 55: +-#line 551 "HTMLParser.y" ++#line 553 "HTMLParser.y" + { + yyval.list_items = 0; + ; + break;} + case 56: +-#line 554 "HTMLParser.y" ++#line 556 "HTMLParser.y" + { + yyval.list_items = yyvsp[-1].list_items; + ; + break;} + case 57: +-#line 557 "HTMLParser.y" ++#line 559 "HTMLParser.y" + { + yyval.list_items = yyvsp[-1].list_items ? yyvsp[-1].list_items : new list<auto_ptr<ListItem> >; + yyval.list_items->push_back(auto_ptr<ListItem>(yyvsp[0].list_item)); + ; + break;} + case 58: +-#line 564 "HTMLParser.y" ++#line 566 "HTMLParser.y" + { + ListNormalItem *lni = new ListNormalItem; + lni->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2391,7 +2393,7 @@ case 58: + ; + break;} + case 59: +-#line 570 "HTMLParser.y" ++#line 572 "HTMLParser.y" + { /* EXTENSION: Handle a "block" in a list as an indented block. */ + ListBlockItem *lbi = new ListBlockItem; + lbi->block.reset(yyvsp[0].element); +@@ -2399,7 +2401,7 @@ case 59: + ; + break;} + case 60: +-#line 575 "HTMLParser.y" ++#line 577 "HTMLParser.y" + { /* EXTENSION: Treat "texts" in a list as an "<LI>". */ + ListNormalItem *lni = new ListNormalItem; + lni->flow.reset(yyvsp[0].element_list); +@@ -2407,7 +2409,7 @@ case 60: + ; + break;} + case 61: +-#line 585 "HTMLParser.y" ++#line 587 "HTMLParser.y" + { + delete yyvsp[-4].tag_attributes; + delete yyvsp[-3].element_list; /* Kludge */ +@@ -2415,7 +2417,7 @@ case 61: + ; + break;} + case 62: +-#line 591 "HTMLParser.y" ++#line 593 "HTMLParser.y" + { + DefinitionList *dl = new DefinitionList; + dl->attributes.reset(yyvsp[-4].tag_attributes); +@@ -2425,33 +2427,33 @@ case 62: + ; + break;} + case 63: +-#line 601 "HTMLParser.y" ++#line 603 "HTMLParser.y" + { + yyval.definition_list_item_list = 0; + ; + break;} + case 64: +-#line 604 "HTMLParser.y" ++#line 606 "HTMLParser.y" + { + yyval.definition_list_item_list = yyvsp[0].definition_list_item_list; + ; + break;} + case 65: +-#line 607 "HTMLParser.y" ++#line 609 "HTMLParser.y" + { + yyval.definition_list_item_list = yyvsp[-1].definition_list_item_list ? yyvsp[-1].definition_list_item_list : new list<auto_ptr<DefinitionListItem> >; + yyval.definition_list_item_list->push_back(auto_ptr<DefinitionListItem>(yyvsp[0].term_name)); + ; + break;} + case 66: +-#line 611 "HTMLParser.y" ++#line 613 "HTMLParser.y" + { + yyval.definition_list_item_list = yyvsp[-1].definition_list_item_list ? yyvsp[-1].definition_list_item_list : new list<auto_ptr<DefinitionListItem> >; + yyval.definition_list_item_list->push_back(auto_ptr<DefinitionListItem>(yyvsp[0].term_definition)); + ; + break;} + case 67: +-#line 618 "HTMLParser.y" ++#line 620 "HTMLParser.y" + { /* EXTENSION: Allow "flow" instead of "texts" */ + delete yyvsp[-2].tag_attributes; + yyval.term_name = new TermName; +@@ -2459,7 +2461,7 @@ case 67: + ; + break;} + case 68: +-#line 623 "HTMLParser.y" ++#line 625 "HTMLParser.y" + {/* EXTENSION: Ignore <P> after </DT> */ + delete yyvsp[-4].tag_attributes; + delete yyvsp[-1].tag_attributes; +@@ -2468,7 +2470,7 @@ case 68: + ; + break;} + case 69: +-#line 632 "HTMLParser.y" ++#line 634 "HTMLParser.y" + { + delete yyvsp[-2].tag_attributes; + yyval.term_definition = new TermDefinition; +@@ -2476,7 +2478,7 @@ case 69: + ; + break;} + case 70: +-#line 637 "HTMLParser.y" ++#line 639 "HTMLParser.y" + {/* EXTENSION: Ignore <P> after </DD> */ + delete yyvsp[-4].tag_attributes; + delete yyvsp[-1].tag_attributes; +@@ -2485,44 +2487,44 @@ case 70: + ; + break;} + case 71: +-#line 646 "HTMLParser.y" ++#line 648 "HTMLParser.y" + { + yyval.element_list = new list<auto_ptr<Element> >; + yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 72: +-#line 650 "HTMLParser.y" ++#line 652 "HTMLParser.y" + { + yyval.element_list = yyvsp[-1].element_list; + ; + break;} + case 73: +-#line 653 "HTMLParser.y" ++#line 655 "HTMLParser.y" + { + (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 74: +-#line 659 "HTMLParser.y" ++#line 661 "HTMLParser.y" + { + yyval.element = yyvsp[0].element; + ; + break;} + case 75: +-#line 662 "HTMLParser.y" ++#line 664 "HTMLParser.y" + { /* EXTENSION: Allow headings in "flow", i.e. in lists */ + yyval.element = yyvsp[0].heading; + ; + break;} + case 76: +-#line 665 "HTMLParser.y" ++#line 667 "HTMLParser.y" + { + yyval.element = yyvsp[0].element; + ; + break;} + case 77: +-#line 671 "HTMLParser.y" ++#line 673 "HTMLParser.y" + { + yyval.preformatted = new Preformatted; + yyval.preformatted->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2530,7 +2532,7 @@ case 77: + ; + break;} + case 78: +-#line 679 "HTMLParser.y" ++#line 681 "HTMLParser.y" + { + yyval.caption = new Caption; + yyval.caption->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2538,19 +2540,19 @@ case 78: + ; + break;} + case 79: +-#line 687 "HTMLParser.y" ++#line 689 "HTMLParser.y" + { + yyval.table_rows = new list<auto_ptr<TableRow> >; + ; + break;} + case 80: +-#line 690 "HTMLParser.y" ++#line 692 "HTMLParser.y" + { + yyval.table_rows = yyvsp[-1].table_rows; + ; + break;} + case 81: +-#line 693 "HTMLParser.y" ++#line 695 "HTMLParser.y" + { + TableRow *tr = new TableRow; + tr->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2559,19 +2561,19 @@ case 81: + ; + break;} + case 82: +-#line 702 "HTMLParser.y" ++#line 704 "HTMLParser.y" + { + yyval.table_cells = new list<auto_ptr<TableCell> >; + ; + break;} + case 83: +-#line 705 "HTMLParser.y" ++#line 707 "HTMLParser.y" + { + yyval.table_cells = yyvsp[-1].table_cells; + ; + break;} + case 84: +-#line 708 "HTMLParser.y" ++#line 710 "HTMLParser.y" + { + TableCell *tc = new TableCell; + tc->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2580,7 +2582,7 @@ case 84: + ; + break;} + case 85: +-#line 714 "HTMLParser.y" ++#line 716 "HTMLParser.y" + { + /* EXTENSION: Allow "</TD>" in place of "</TH>". */ + TableHeadingCell *thc = new TableHeadingCell; +@@ -2590,14 +2592,14 @@ case 85: + ; + break;} + case 86: +-#line 721 "HTMLParser.y" ++#line 723 "HTMLParser.y" + { /* EXTENSION: Ignore <INPUT> between table cells. */ + delete yyvsp[0].tag_attributes; + yyval.table_cells = yyvsp[-1].table_cells; + ; + break;} + case 87: +-#line 728 "HTMLParser.y" ++#line 730 "HTMLParser.y" + { /* Should be "address_content"... */ + delete yyvsp[-2].tag_attributes; + yyval.address = new Address; +@@ -2605,40 +2607,40 @@ case 87: + ; + break;} + case 88: +-#line 738 "HTMLParser.y" ++#line 740 "HTMLParser.y" + { + yyval.element_list = new list<auto_ptr<Element> >; + yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 89: +-#line 742 "HTMLParser.y" ++#line 744 "HTMLParser.y" + { + (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 90: +-#line 748 "HTMLParser.y" ++#line 750 "HTMLParser.y" + { yyval.element = yyvsp[-1].pcdata; ; + break;} + case 91: +-#line 749 "HTMLParser.y" ++#line 751 "HTMLParser.y" + { yyval.element = yyvsp[-1].element; ; + break;} + case 92: +-#line 750 "HTMLParser.y" ++#line 752 "HTMLParser.y" + { yyval.element = yyvsp[-1].element; ; + break;} + case 93: +-#line 751 "HTMLParser.y" ++#line 753 "HTMLParser.y" + { yyval.element = yyvsp[-1].element; ; + break;} + case 94: +-#line 752 "HTMLParser.y" ++#line 754 "HTMLParser.y" + { yyval.element = yyvsp[-1].element; ; + break;} + case 95: +-#line 753 "HTMLParser.y" ++#line 755 "HTMLParser.y" + { /* EXTENSION: NS 1.1 / IE 2.0 */ + NoBreak *nb = new NoBreak; + delete yyvsp[-3].tag_attributes; +@@ -2647,75 +2649,75 @@ case 95: + ; + break;} + case 96: +-#line 762 "HTMLParser.y" ++#line 764 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(TT, yyvsp[-1].element_list); ; + break;} + case 97: +-#line 763 "HTMLParser.y" ++#line 765 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(I, yyvsp[-1].element_list); ; + break;} + case 98: +-#line 764 "HTMLParser.y" ++#line 766 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(B, yyvsp[-1].element_list); ; + break;} + case 99: +-#line 765 "HTMLParser.y" ++#line 767 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(U, yyvsp[-1].element_list); ; + break;} + case 100: +-#line 766 "HTMLParser.y" ++#line 768 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(STRIKE, yyvsp[-1].element_list); ; + break;} + case 101: +-#line 767 "HTMLParser.y" ++#line 769 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(BIG, yyvsp[-1].element_list); ; + break;} + case 102: +-#line 768 "HTMLParser.y" ++#line 770 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SMALL, yyvsp[-1].element_list); ; + break;} + case 103: +-#line 769 "HTMLParser.y" ++#line 771 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SUB, yyvsp[-1].element_list); ; + break;} + case 104: +-#line 770 "HTMLParser.y" ++#line 772 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SUP, yyvsp[-1].element_list); ; + break;} + case 105: +-#line 774 "HTMLParser.y" ++#line 776 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(EM, yyvsp[-1].element_list); ; + break;} + case 106: +-#line 775 "HTMLParser.y" ++#line 777 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(STRONG, yyvsp[-1].element_list); ; + break;} + case 107: +-#line 776 "HTMLParser.y" ++#line 778 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(DFN, yyvsp[-1].element_list); ; + break;} + case 108: +-#line 777 "HTMLParser.y" ++#line 779 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(CODE, yyvsp[-1].element_list); ; + break;} + case 109: +-#line 778 "HTMLParser.y" ++#line 780 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(SAMP, yyvsp[-1].element_list); ; + break;} + case 110: +-#line 779 "HTMLParser.y" ++#line 781 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(KBD, yyvsp[-1].element_list); ; + break;} + case 111: +-#line 780 "HTMLParser.y" ++#line 782 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(VAR, yyvsp[-1].element_list); ; + break;} + case 112: +-#line 781 "HTMLParser.y" ++#line 783 "HTMLParser.y" + { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(CITE, yyvsp[-1].element_list); ; + break;} + case 113: +-#line 788 "HTMLParser.y" ++#line 790 "HTMLParser.y" + { + delete yyvsp[-2].tag_attributes; + Anchor *a = new Anchor; +@@ -2725,7 +2727,7 @@ case 113: + ; + break;} + case 114: +-#line 795 "HTMLParser.y" ++#line 797 "HTMLParser.y" + { + Image *i = new Image; + i->attributes.reset(yyvsp[0].tag_attributes); +@@ -2733,7 +2735,7 @@ case 114: + ; + break;} + case 115: +-#line 800 "HTMLParser.y" ++#line 802 "HTMLParser.y" + { + Applet *a = new Applet; + a->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2742,7 +2744,7 @@ case 115: + ; + break;} + case 116: +-#line 808 "HTMLParser.y" ++#line 810 "HTMLParser.y" + { + Font2 *f2 = new Font2; + f2->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2751,7 +2753,7 @@ case 116: + ; + break;} + case 117: +-#line 814 "HTMLParser.y" ++#line 816 "HTMLParser.y" + { + BaseFont *bf = new BaseFont; + bf->attributes.reset(yyvsp[0].tag_attributes); +@@ -2759,7 +2761,7 @@ case 117: + ; + break;} + case 118: +-#line 819 "HTMLParser.y" ++#line 821 "HTMLParser.y" + { + LineBreak *lb = new LineBreak; + lb->attributes.reset(yyvsp[0].tag_attributes); +@@ -2767,7 +2769,7 @@ case 118: + ; + break;} + case 119: +-#line 824 "HTMLParser.y" ++#line 826 "HTMLParser.y" + { + Map *m = new Map; + m->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2776,20 +2778,20 @@ case 119: + ; + break;} + case 120: +-#line 833 "HTMLParser.y" ++#line 835 "HTMLParser.y" + { + yyval.element_list = 0; + ; + break;} + case 121: +-#line 836 "HTMLParser.y" ++#line 838 "HTMLParser.y" + { + yyval.element_list = yyvsp[-1].element_list ? yyvsp[-1].element_list : new list<auto_ptr<Element> >; + yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element)); + ; + break;} + case 122: +-#line 840 "HTMLParser.y" ++#line 842 "HTMLParser.y" + { + yyval.element_list = yyvsp[-1].element_list ? yyvsp[-1].element_list : new list<auto_ptr<Element> >; + Param *p = new Param; +@@ -2798,26 +2800,26 @@ case 122: + ; + break;} + case 123: +-#line 849 "HTMLParser.y" ++#line 851 "HTMLParser.y" + { + yyval.tag_attributes_list = 0; + ; + break;} + case 124: +-#line 852 "HTMLParser.y" ++#line 854 "HTMLParser.y" + { + yyval.tag_attributes_list = yyvsp[-1].tag_attributes_list; + ; + break;} + case 125: +-#line 855 "HTMLParser.y" ++#line 857 "HTMLParser.y" + { + yyval.tag_attributes_list = yyvsp[-1].tag_attributes_list ? yyvsp[-1].tag_attributes_list : new list<auto_ptr<list<TagAttribute> > >; + yyval.tag_attributes_list->push_back(auto_ptr<list<TagAttribute> >(yyvsp[0].tag_attributes)); + ; + break;} + case 126: +-#line 862 "HTMLParser.y" ++#line 864 "HTMLParser.y" + { + Input *i = new Input; + i->attributes.reset(yyvsp[0].tag_attributes); +@@ -2825,7 +2827,7 @@ case 126: + ; + break;} + case 127: +-#line 867 "HTMLParser.y" ++#line 869 "HTMLParser.y" + { + Select *s = new Select; + s->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2834,7 +2836,7 @@ case 127: + ; + break;} + case 128: +-#line 873 "HTMLParser.y" ++#line 875 "HTMLParser.y" + { + TextArea *ta = new TextArea; + ta->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2843,20 +2845,20 @@ case 128: + ; + break;} + case 129: +-#line 882 "HTMLParser.y" ++#line 884 "HTMLParser.y" + { + yyval.option_list = new list<auto_ptr<Option> >; + yyval.option_list->push_back(auto_ptr<Option>(yyvsp[0].option)); + ; + break;} + case 130: +-#line 886 "HTMLParser.y" ++#line 888 "HTMLParser.y" + { + (yyval.option_list = yyvsp[-1].option_list)->push_back(auto_ptr<Option>(yyvsp[0].option)); + ; + break;} + case 131: +-#line 892 "HTMLParser.y" ++#line 894 "HTMLParser.y" + { + yyval.option = new Option; + yyval.option->attributes.reset(yyvsp[-2].tag_attributes); +@@ -2864,99 +2866,99 @@ case 131: + ; + break;} + case 132: +-#line 902 "HTMLParser.y" ++#line 904 "HTMLParser.y" + { yyval.heading = new Heading; yyval.heading->level = 1; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; + break;} + case 133: +-#line 903 "HTMLParser.y" ++#line 905 "HTMLParser.y" + { yyval.heading = new Heading; yyval.heading->level = 2; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; + break;} + case 134: +-#line 904 "HTMLParser.y" ++#line 906 "HTMLParser.y" + { yyval.heading = new Heading; yyval.heading->level = 3; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; + break;} + case 135: +-#line 905 "HTMLParser.y" ++#line 907 "HTMLParser.y" + { yyval.heading = new Heading; yyval.heading->level = 4; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; + break;} + case 136: +-#line 906 "HTMLParser.y" ++#line 908 "HTMLParser.y" + { yyval.heading = new Heading; yyval.heading->level = 5; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; + break;} + case 137: +-#line 907 "HTMLParser.y" ++#line 909 "HTMLParser.y" + { yyval.heading = new Heading; yyval.heading->level = 6; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; + break;} + case 138: +-#line 911 "HTMLParser.y" ++#line 913 "HTMLParser.y" + { yyval.inT = 1; ; + break;} + case 139: +-#line 912 "HTMLParser.y" ++#line 914 "HTMLParser.y" + { yyval.inT = 2; ; + break;} + case 140: +-#line 913 "HTMLParser.y" ++#line 915 "HTMLParser.y" + { yyval.inT = 3; ; + break;} + case 141: +-#line 914 "HTMLParser.y" ++#line 916 "HTMLParser.y" + { yyval.inT = 4; ; + break;} + case 142: +-#line 915 "HTMLParser.y" ++#line 917 "HTMLParser.y" + { yyval.inT = 5; ; + break;} + case 143: +-#line 916 "HTMLParser.y" ++#line 918 "HTMLParser.y" + { yyval.inT = 6; ; + break;} + case 144: +-#line 921 "HTMLParser.y" ++#line 923 "HTMLParser.y" + { yyval.pcdata = 0; ; + break;} + case 145: +-#line 921 "HTMLParser.y" ++#line 923 "HTMLParser.y" + { yyval.pcdata = yyvsp[0].pcdata; ; + break;} + case 146: +-#line 922 "HTMLParser.y" ++#line 924 "HTMLParser.y" + { yyval.caption = 0; ; + break;} + case 147: +-#line 922 "HTMLParser.y" ++#line 924 "HTMLParser.y" + { yyval.caption = yyvsp[0].caption; ; + break;} + case 148: +-#line 923 "HTMLParser.y" ++#line 925 "HTMLParser.y" + { yyval.element_list = 0; ; + break;} + case 149: +-#line 923 "HTMLParser.y" ++#line 925 "HTMLParser.y" + { yyval.element_list = yyvsp[0].element_list; ; + break;} + case 150: +-#line 924 "HTMLParser.y" ++#line 926 "HTMLParser.y" + { yyval.element_list = 0; ; + break;} + case 151: +-#line 924 "HTMLParser.y" ++#line 926 "HTMLParser.y" + { yyval.element_list = yyvsp[0].element_list; ; + break;} + case 152: +-#line 926 "HTMLParser.y" ++#line 928 "HTMLParser.y" + { yyval.tag_attributes = 0; ; + break;} + case 153: +-#line 926 "HTMLParser.y" ++#line 928 "HTMLParser.y" + { yyval.tag_attributes = yyvsp[0].tag_attributes; ; + break;} + case 154: +-#line 927 "HTMLParser.y" ++#line 929 "HTMLParser.y" + { yyval.tag_attributes = 0; ; + break;} + case 155: +-#line 927 "HTMLParser.y" ++#line 929 "HTMLParser.y" + { yyval.tag_attributes = yyvsp[0].tag_attributes; ; + break;} + } +@@ -3158,7 +3160,7 @@ yyerrhandle: + /* END */ + + /* #line 891 "/usr/local/lib/bison.cc" */ +-#line 965 "HTMLParser.y" ++#line 967 "HTMLParser.y" + /* } */ + + /* diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-HTMLParser_h ./patches/patch-HTMLParser_h --- /usr/ports/converters/html2text/patches/patch-HTMLParser_h Thu Jan 1 01:00:00 1970 +++ ./patches/patch-HTMLParser_h Sat Jan 21 20:58:10 2017 @@ -0,0 +1,12 @@ +$OpenBSD$ +--- HTMLParser.h.orig Mon Aug 12 19:19:43 2002 ++++ HTMLParser.h Sat Jan 21 20:57:57 2017 +@@ -487,7 +487,7 @@ static const int END_VAR; + /* decl const */ + public: + int YY_HTMLParser_PARSE(YY_HTMLParser_PARSE_PARAM); +- virtual void YY_HTMLParser_ERROR(char *) YY_HTMLParser_ERROR_BODY; ++ virtual void YY_HTMLParser_ERROR(const char *) YY_HTMLParser_ERROR_BODY; + #ifdef YY_HTMLParser_PURE + #ifdef YY_HTMLParser_LSP_NEEDED + virtual int YY_HTMLParser_LEX(YY_HTMLParser_STYPE *YY_HTMLParser_LVAL,YY_HTMLParser_LTYPE *YY_HTMLParser_LLOC) YY_HTMLParser_LEX_BODY; diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-HTMLParser_y ./patches/patch-HTMLParser_y --- /usr/ports/converters/html2text/patches/patch-HTMLParser_y Thu Jan 1 01:00:00 1970 +++ ./patches/patch-HTMLParser_y Sat Jan 21 20:58:10 2017 @@ -0,0 +1,14 @@ +$OpenBSD$ +--- HTMLParser.y.orig Sun Nov 23 12:05:29 2003 ++++ HTMLParser.y Sat Jan 21 20:57:57 2017 +@@ -337,7 +337,9 @@ document_: + ($$ = $1)->head.base_attributes.reset($2); + } + | document_ META { +- ($$ = $1)->head.meta_attributes.reset($2); ++ auto_ptr<Meta> s(new Meta); ++ s->attributes.reset($2); ++ ($$ = $1)->head.metas.push_back(s); + } + | document_ LINK { + ($$ = $1)->head.link_attributes.reset($2); diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-Makefile_in ./patches/patch-Makefile_in --- /usr/ports/converters/html2text/patches/patch-Makefile_in Mon Sep 17 21:21:21 2007 +++ ./patches/patch-Makefile_in Sat Jan 21 21:27:57 2017 @@ -1,6 +1,6 @@ -$OpenBSD: patch-Makefile_in,v 1.1.1.1 2007/09/17 19:21:21 jasper Exp $ +$OpenBSD$ --- Makefile.in.orig Wed Jan 14 14:47:02 2004 -+++ Makefile.in Thu Sep 13 21:36:17 2007 ++++ Makefile.in Sat Jan 21 21:27:53 2017 @@ -29,9 +29,9 @@ BISONXX = bison++ YFLAGS = @@ -23,3 +23,12 @@ LDFLAGS = $(DEBUG) LOADLIBES = $(LIBSTDCXX_LIBS) $(SOCKET_LIBRARIES) +@@ -68,7 +68,7 @@ all : html2text + @echo '"/usr/local/bin", "/usr/local/man/man1" and "/usr/local/man/man5").'; + @echo + +-OBJS = html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o sgml.o table.o urlistream.o Properties.o cmp_nocase.o ++OBJS = html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o sgml.o table.o Properties.o cmp_nocase.o + + html2text : $(OBJS) $(LIBSTDCXX_LIBS) + $(CXX) $(LDFLAGS) $(OBJS) $(LOADLIBES) $(LDLIBS) -o $@ diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-configure ./patches/patch-configure --- /usr/ports/converters/html2text/patches/patch-configure Thu Jan 1 01:00:00 1970 +++ ./patches/patch-configure Sat Jan 21 21:17:41 2017 @@ -0,0 +1,35 @@ +$OpenBSD$ +--- configure.orig Mon Jan 12 16:47:18 2004 ++++ configure Sat Jan 21 21:17:36 2017 +@@ -38,8 +38,7 @@ int main(int, char **) { + return 0; + } + EOF +-CXX=unknown; +-for i in "CC" "g++" "cc" "$CC"; do ++for i in "$CXX" "c++" "g++" "cc" "$CC"; do + if $i -c $tmp_file.C 2>/dev/null; then + CXX="$i"; + break; +@@ -205,12 +204,19 @@ cat <<EOF >$tmp_file.C; + #include <new> + #include <vector> + using namespace std; +-void func() { map<string, string> x; } ++int main(void) { ++ map<string, string> x; ++ return 0; ++} + EOF +-if $CXX -c $tmp_file.C 2>/dev/null; then ++if $CXX $tmp_file.C 2>/dev/null; then + LIBSTDCXX_INCLUDES=""; + LIBSTDCXX_LIBS=""; + $echo 'works; no need to make "./libstd"'; ++elif $CXX $tmp_file.C -lstdc++ 2>/dev/null; then ++ LIBSTDCXX_INCLUDES=""; ++ LIBSTDCXX_LIBS="-lstdc++"; ++ $echo 'works with libstdc++; no need to make "./libstd"'; + else + LIBSTDCXX_INCLUDES='-Ilibstd/include'; + LIBSTDCXX_LIBS='libstd/libstd.a'; diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-format_C ./patches/patch-format_C --- /usr/ports/converters/html2text/patches/patch-format_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-format_C Sat Jan 21 20:58:10 2017 @@ -0,0 +1,113 @@ +$OpenBSD$ +--- format.C.orig Sun Nov 23 12:05:29 2003 ++++ format.C Sat Jan 21 20:57:57 2017 +@@ -56,6 +56,8 @@ using std::flush; + #define nelems(array) (sizeof(array) / sizeof((array)[0])) + #endif + ++extern int use_encoding; ++ + /* ------------------------------------------------------------------------- */ + + static Line *line_format(const list<auto_ptr<Element> > *elements); +@@ -560,7 +562,7 @@ Heading::format(Area::size_type w, int halign) const + "LEFT", Area::LEFT, + "CENTER", Area::CENTER, + "RIGHT", Area::RIGHT, +- 0 ++ NULL + ); + + static char cell_attributes[7]; +@@ -682,7 +684,7 @@ Paragraph::format(Area::size_type w, int halign) const + "LEFT", Area::LEFT, + "CENTER", Area::CENTER, + "RIGHT", Area::RIGHT, +- 0 ++ NULL + ); + + static BlockFormat bf("P"); +@@ -752,7 +754,7 @@ Applet::format(Area::size_type w, int /*halign*/ ) con + "LEFT", Area::LEFT, + "MIDDLE", Area::CENTER, + "RIGHT", Area::RIGHT, +- 0 ++ NULL + ); + Area *a = ::format(content.get(), w, halign); + if (a) return a; +@@ -802,7 +804,7 @@ Division::format(Area::size_type w, int halign) const + "LEFT", Area::LEFT, + "CENTER", Area::CENTER, + "RIGHT", Area::RIGHT, +- 0 ++ NULL + )); + } + +@@ -882,7 +884,7 @@ Input::line_format() const + res = '[' + string(size, '*') + ']'; + } else + if (cmp_nocase(type, "CHECKBOX") == 0) { +- res = checked ? '*' : LATIN1_ordm; // "ordm" looks like a superscript zero. ++ res = checked ? string("*") : (USE_UTF8 ? string("\u2070") : string(1, LATIN1_ordm)); // "ordm" looks like a superscript zero. + } else + if (cmp_nocase(type, "RADIO") == 0) { + res = checked ? '#' : 'o'; +@@ -1168,10 +1170,13 @@ NoBreak::line_format() const + Line *l(::line_format(content.get())); + if (!l) return 0; + ++ // don't insert ISO-8859-1 non-breaking spaces, it breaks UTF-8 processing ++ /* + for (Line::size_type i = 0; i < l->length(); ++i) { + Cell &c((*l)[i]); + if (c.character == ' ') c.character = LATIN1_nbsp; + } ++ */ + return l; + } + +@@ -1210,6 +1215,7 @@ make_up(const Line &line, Area::size_type w, int halig + } + + Line::size_type to = from + 1; ++ int to_from; + + Line::size_type lbp = (Line::size_type) -1; // "Last break position". + +@@ -1238,18 +1244,20 @@ make_up(const Line &line, Area::size_type w, int halig + to++; + } + +- if (to - from > w && lbp != (Area::size_type) -1) { to = lbp; break; } ++ if (line.utf_length(from,to) > w && lbp != (Area::size_type) -1) ++ { to = lbp; break; } + } + ++ to_from = line.utf_length(from,to); + /* + * Copy the "from...to" range from the "line" to the bottom of the "res" + * Area. + */ + Area::size_type x = 0; + Area::size_type len = to - from; +- if (halign == Area::LEFT || len >= w) { ; } else +- if (halign == Area::CENTER) { x += (w - len) / 2; } else +- if (halign == Area::RIGHT) { x += w - len; } ++ if (halign == Area::LEFT || to_from >= w) { ; } else ++ if (halign == Area::CENTER) { x += (w - to_from) / 2; } else ++ if (halign == Area::RIGHT) { x += w - to_from; } + res->insert(line.cells() + from, len, x, res->height()); + + /* +@@ -1632,7 +1640,7 @@ ListFormat::get_type( + "A", UPPER_ALPHA, + "i", LOWER_ROMAN, + "I", UPPER_ROMAN, +- 0 ++ NULL + ); + } + diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-html2text_C ./patches/patch-html2text_C --- /usr/ports/converters/html2text/patches/patch-html2text_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-html2text_C Sat Jan 21 20:58:10 2017 @@ -0,0 +1,400 @@ +$OpenBSD$ +--- html2text.C.orig Sun Nov 23 12:05:29 2003 ++++ html2text.C Sat Jan 21 20:57:57 2017 +@@ -36,26 +36,41 @@ + + + #include <iostream> ++#include <fstream> ++#include <sstream> ++#include <algorithm> ++#include <iterator> + #include <string.h> + #include <stdlib.h> + ++#include <iconv.h> ++#include <errno.h> ++#include <unistd.h> ++#include <langinfo.h> ++ + #include "html.h" + #include "HTMLControl.h" +-#include "urlistream.h" ++//#include "urlistream.h" + #include "format.h" + + #define stringify(x) stringify2(x) + #define stringify2(x) #x + + /* ------------------------------------------------------------------------- */ ++using std::ifstream; ++using std::stringstream; ++using std::istream_iterator; ++using std::ostream_iterator; ++using std::noskipws; + + class MyParser : public HTMLControl { + + public: + enum { PRINT_AS_ASCII, UNPARSE, SYNTAX_CHECK }; ++ string meta_encoding; + + MyParser( +- urlistream &is_, ++ istream &is_, + bool debug_scanner_, + bool debug_parser_, + ostream &os_, +@@ -71,7 +86,7 @@ class MyParser : public HTMLControl { (public) + {} + + private: +- /*virtual*/ void yyerror(char *); ++ /*virtual*/ void yyerror(const char *); + /*virtual*/ void process(const Document &); + + ostream &os; +@@ -81,7 +96,7 @@ class MyParser : public HTMLControl { (public) + }; + + /*virtual*/ void +-MyParser::yyerror(char *p) ++MyParser::yyerror(const char *p) + { + + /* +@@ -104,6 +119,23 @@ MyParser::yyerror(char *p) + /*virtual*/ void + MyParser::process(const Document &document) + { ++ list<auto_ptr<Meta> >::const_iterator i; ++ for(i = document.head.metas.begin(); i != document.head.metas.end(); ++i) { ++ bool exists = false; ++ get_attribute(i->get()->attributes.get(), "http-equiv", &exists); ++ if (exists) { ++ string content = get_attribute(i->get()->attributes.get(), "content", ""); ++ char to_find[] = "charset="; ++ string::size_type found_pos = content.find(to_find); ++ if (found_pos != string::npos) ++ { ++ this->meta_encoding = content.substr(found_pos + sizeof(to_find) - 1); ++ //std::cerr << this->meta_encoding << std::endl; ++ } ++ break; ++ } ++ } ++ + switch (mode) { + + case PRINT_AS_ASCII: +@@ -124,6 +156,72 @@ MyParser::process(const Document &document) + } + } + ++bool recode(stringstream& stream, const char* to_encoding, const char* from_encoding) ++{ ++ iconv_t iconv_handle = iconv_open(to_encoding, from_encoding); ++ if (iconv_handle != iconv_t(-1)) ++ { ++ stream.seekg(0); ++ string input_string = stream.str(); ++ size_t input_size = input_string.size(); ++ char* raw_input = new char[input_size+1]; ++ char* const orig_raw_input = raw_input; ++ strcpy(raw_input, input_string.data()); ++ size_t max_output_size = input_size * 4; // maximum possible overhead ++ char* raw_output = new char[max_output_size+1]; ++ char* const orig_raw_output = raw_output; ++ size_t iconv_value = ++ iconv(iconv_handle, &raw_input, &input_size, &raw_output, &max_output_size); ++ ++ if (iconv_value != (size_t)-1) ++ { ++ *raw_output = '\0'; ++ stream.str(string(orig_raw_output)); ++ /* debug */ ++ //std::copy(istream_iterator<char>(input_stream), istream_iterator<char>(), ostream_iterator<char>(std::cerr)); ++ } ++ else ++ { ++ std::cerr << "Input recoding failed due to "; ++ if (errno == EILSEQ) ++ { ++ std::cerr << "invalid input sequence. Unconverted part of text follows." << std::endl; ++ std::cerr << raw_input; ++ } ++ else ++ { ++ std::cerr << "unknown reason."; ++ } ++ std::cerr << std::endl; ++ } ++ ++ delete [] orig_raw_input; ++ delete [] orig_raw_output; ++ iconv_close(iconv_handle); ++ ++ if (iconv_value == (size_t)-1) ++ { ++ return false; ++ } ++ } ++ else ++ { ++ if (errno == EINVAL) ++ { ++ std::cerr << "Recoding from '" << from_encoding ++ << "' to '" << to_encoding << "' is not available." << std::endl; ++ std::cerr << "Check that '" << from_encoding ++ << "' is a valid encoding." << std::endl; ++ } ++ else ++ { ++ std::cerr << "Error: cannot setup recoding." << std::endl; ++ } ++ return false; ++ } ++ return true; ++} ++ + /* ------------------------------------------------------------------------- */ + + static const char *usage = "\ +@@ -132,7 +230,7 @@ Usage:\n\ + html2text -version\n\ + html2text [ -unparse | -check ] [ -debug-scanner ] [ -debug-parser ] \\\n\ + [ -rcfile <file> ] [ -style ( compact | pretty ) ] [ -width <w> ] \\\n\ +- [ -o <file> ] [ -nobs ] [ -ascii ] [ <input-url> ] ...\n\ ++ [ -o <file> ] [ -nobs ] [ -ascii | -utf8 ] [ <input-url> ] ...\n\ + Formats HTML document(s) read from <input-url> or STDIN and generates ASCII\n\ + text.\n\ + -help Print this text and exit\n\ +@@ -148,9 +246,11 @@ text.\n\ + -o <file> Redirect output into <file>\n\ + -nobs Do not use backspaces for boldface and underlining\n\ + -ascii Use plain ASCII for output instead of ISO-8859-1\n\ ++ -utf8 Assume both terminal and input stream are in UTF-8 mode\n\ ++ -nometa Don't try to recode input using 'meta' tag\n\ + "; + +-int use_iso8859 = 1; ++int use_encoding = ISO8859; + + int + main(int argc, char **argv) +@@ -184,22 +284,25 @@ main(int argc, char **argv) + const char *style = "compact"; + int width = 79; + const char *output_file_name = "-"; +- bool use_backspaces = true; ++ bool use_backspaces = false; ++ bool use_meta = true; + + int i; + for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) { + const char *arg = argv[i]; + +- if (!strcmp(arg, "-unparse" )) { mode = MyParser::UNPARSE; } else +- if (!strcmp(arg, "-check" )) { mode = MyParser::SYNTAX_CHECK; } else +- if (!strcmp(arg, "-debug-scanner")) { debug_scanner = true; } else +- if (!strcmp(arg, "-debug-parser" )) { debug_parser = true; } else +- if (!strcmp(arg, "-rcfile" )) { rcfile = argv[++i]; } else +- if (!strcmp(arg, "-style" )) { style = argv[++i]; } else +- if (!strcmp(arg, "-width" )) { width = atoi(argv[++i]); } else +- if (!strcmp(arg, "-o" )) { output_file_name = argv[++i]; } else +- if (!strcmp(arg, "-nobs" )) { use_backspaces = false; } else +- if (!strcmp(arg, "-ascii" )) { use_iso8859 = false; } else ++ if (!strcmp(arg, "-unparse" )) { mode = MyParser::UNPARSE; } else ++ if (!strcmp(arg, "-check" )) { mode = MyParser::SYNTAX_CHECK; } else ++ if (!strcmp(arg, "-debug-scanner")) { debug_scanner = true; } else ++ if (!strcmp(arg, "-debug-parser" )) { debug_parser = true; } else ++ if (!strcmp(arg, "-rcfile" )) { rcfile = argv[++i]; } else ++ if (!strcmp(arg, "-style" )) { style = argv[++i]; } else ++ if (!strcmp(arg, "-width" )) { if (atoi(argv[++i]) > 0) width = atoi(argv[i]); } else ++ if (!strcmp(arg, "-o" )) { output_file_name = argv[++i]; } else ++ if (!strcmp(arg, "-nobs" )) { use_backspaces = false; } else ++ if (!strcmp(arg, "-ascii" )) { use_encoding = ASCII; } else ++ if (!strcmp(arg, "-utf8" )) { use_encoding = UTF8; } else ++ if (!strcmp(arg, "-nometa" )) { use_meta = false; } else + { + std::cerr + << "Unrecognized command line option \"" +@@ -329,8 +432,13 @@ main(int argc, char **argv) + ostream *osp; + std::ofstream ofs; + ++ bool output_is_tty = false; + if (!strcmp(output_file_name, "-")) { + osp = &std::cout; ++ if (isatty(1 /* stdout */)) ++ { ++ output_is_tty = true; ++ } + } else { + ofs.open(output_file_name, std::ios::out); + if (!ofs) { +@@ -352,30 +460,145 @@ main(int argc, char **argv) + } + + istream *isp; +- urlistream uis; ++ istream *uis; ++ ifstream* infile = NULL; ++ stringstream input_stream; + +- uis.open(input_url); +- if (!uis.is_open()) { +- std::cerr +- << "Opening input URL \"" +- << input_url +- << "\": " +- << uis.open_error() +- << std::endl; +- exit(1); ++ if (strcmp(input_url, "-") == 0) ++ { ++ uis = &std::cin; ++ } ++ else ++ { ++ infile = new ifstream(input_url); ++ if (!infile->is_open()) ++ { ++ delete infile; ++ std::cerr ++ << "Cannot open input file \"" ++ << input_url ++ << "\"." ++ << std::endl; ++ exit(1); ++ } ++ uis = infile; + } + +- MyParser parser( +- uis, +- debug_scanner, +- debug_parser, +- *osp, +- mode, +- width, +- input_url +- ); ++ *uis >> noskipws; ++ std::copy(istream_iterator<char>(*uis), istream_iterator<char>(), ostream_iterator<char>(input_stream)); + ++ if (infile) ++ { ++ infile->close(); ++ delete infile; ++ } ++ ++ string from_encoding; ++ if (use_meta) ++ { ++ std::ofstream fake_osp("/dev/null"); ++ // fake parsing to determine meta ++ MyParser parser( ++ input_stream, ++ debug_scanner, ++ debug_parser, ++ fake_osp, ++ mode, ++ width, ++ input_url ++ ); ++ if (parser.yyparse() != 0) exit(1); ++ ++ from_encoding = parser.meta_encoding; ++ ++ // don't need to debug twice ... ++ debug_scanner = false; ++ debug_parser = false; ++ ++ /* ++ * It will be good to show warning in this case. But there are too many ++ * html documents without encoding info, so this branch is commented by ++ * now. ++ if (parser.meta_encoding.empty()) ++ { ++ std::cerr << "Warning: cannot determine encoding from html file." << std::endl; ++ std::cerr << "To remove this warning, use '-nometa' option with, optionally, '-utf8' or '-ascii' options" << std::endl; ++ std::cerr << "to process file \"" << input_url << "\"." << std::endl; ++ } ++ */ ++ } ++ if (from_encoding.empty()) // -nometa supplied or no appropriate tag ++ { ++ if (use_encoding == UTF8) ++ { ++ from_encoding = "UTF-8"; ++ } ++ else if (use_encoding == ASCII) ++ { ++ // is ASCII mode we don't need recoding at all ++ from_encoding = ""; ++ } ++ else ++ { ++ from_encoding = "ISO_8859-1"; ++ } ++ } ++ ++ bool result = true; ++ if (!from_encoding.empty()) ++ { ++ // recode input ++ result = recode(input_stream, "UTF-8", from_encoding.data()); ++ } ++ if (!result) ++ { ++ continue; ++ } ++ ++ if (number_of_input_urls != 1) { ++ *osp << "###### " << input_url << " ######" << std::endl; ++ } ++ ++ // real parsing now always process UTF-8 (except for ASCII mode) ++ if (use_encoding != ASCII) ++ { ++ use_encoding = UTF8; ++ } ++ ++ stringstream output_stream; ++ ++ // real parsing ++ input_stream.clear(); ++ input_stream.seekg(0); ++ MyParser parser( ++ input_stream, ++ debug_scanner, ++ debug_parser, ++ output_stream, ++ mode, ++ width, ++ input_url ++ ); + if (parser.yyparse() != 0) exit(1); ++ ++ // recode output if output is terminal ++ if (output_is_tty) ++ { ++ setlocale(LC_CTYPE,""); ++ char output_encoding[64]; ++ strcpy(output_encoding, nl_langinfo(CODESET)); ++ strcat(output_encoding, "//translit"); ++ ++ result = recode(output_stream, output_encoding, "UTF-8"); ++ if (!result) ++ { ++ continue; ++ } ++ } ++ output_stream.clear(); ++ output_stream.seekg(0); ++ output_stream >> noskipws; ++ std::copy(istream_iterator<char>(output_stream), istream_iterator<char>(), ostream_iterator<char>(*osp)); + } + + return 0; diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-html_C ./patches/patch-html_C --- /usr/ports/converters/html2text/patches/patch-html_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-html_C Sat Jan 21 20:58:10 2017 @@ -0,0 +1,28 @@ +$OpenBSD$ +--- html.C.orig Sun Nov 23 12:05:29 2003 ++++ html.C Sat Jan 21 20:57:57 2017 +@@ -68,6 +68,7 @@ static pack(Option) + static pack(DefinitionListItem) + static pack(Script) + static pack(Style) ++static pack(Meta) + + #undef pack + +@@ -131,9 +132,15 @@ Head::unparse(ostream &os, ostream_manipulator separat + if (base_attributes.get()) os << "<BASE" << base_attributes << ">" << std::endl; + foreach(scripts, os, separator); + foreach(styles, os, separator); +- if (meta_attributes.get()) os << "<META" << meta_attributes << ">" << std::endl; ++ foreach(metas, os, separator); + if (link_attributes.get()) os << "<LINK" << link_attributes << ">" << std::endl; + os << "</HEAD>" << separator; ++} ++ ++void ++Meta::unparse(ostream &os, ostream_manipulator separator) const ++{ ++ os << "<META" << attributes << ">" << separator; + } + + void diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-html_h ./patches/patch-html_h --- /usr/ports/converters/html2text/patches/patch-html_h Thu Jan 1 01:00:00 1970 +++ ./patches/patch-html_h Sat Jan 21 20:58:10 2017 @@ -0,0 +1,36 @@ +$OpenBSD$ +--- html.h.orig Thu Oct 4 22:03:54 2001 ++++ html.h Sat Jan 21 20:57:57 2017 +@@ -61,6 +61,11 @@ + + /* ------------------------------------------------------------------------- */ + ++enum {ASCII, ISO8859, UTF8}; ++#define USE_ISO8859 (use_encoding == ISO8859) ++#define USE_ASCII (use_encoding == ASCII) ++#define USE_UTF8 (use_encoding == UTF8) ++ + #define LATIN1_nbsp 160 + #define LATIN1_iexcl 161 + #define LATIN1_cent 162 +@@ -431,13 +436,19 @@ struct Style { + void unparse(ostream &, ostream_manipulator separator) const; + }; + ++struct Meta { ++ auto_ptr<list<TagAttribute> > attributes; // HTTP-EQUIV NAME CONTENT ++ ++ void unparse(ostream &, ostream_manipulator separator) const; ++}; ++ + struct Head { + auto_ptr<PCData> title; + auto_ptr<list<TagAttribute> > isindex_attributes; // PROMPT + auto_ptr<list<TagAttribute> > base_attributes; // HREF + list<auto_ptr<Script> > scripts; + list<auto_ptr<Style> > styles; +- auto_ptr<list<TagAttribute> > meta_attributes; // HTTP-EQUIV NAME CONTENT ++ list<auto_ptr<Meta> > metas; + auto_ptr<list<TagAttribute> > link_attributes; // HREF REL REV TITLE + + void unparse(ostream &, ostream_manipulator separator) const; diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-sgml_C ./patches/patch-sgml_C --- /usr/ports/converters/html2text/patches/patch-sgml_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-sgml_C Sat Jan 21 20:58:10 2017 @@ -0,0 +1,573 @@ +$OpenBSD$ +--- sgml.C.orig Sun Nov 23 12:09:11 2003 ++++ sgml.C Sat Jan 21 20:57:57 2017 +@@ -61,262 +61,281 @@ + static const struct TextToInt { + char name[8]; + int iso8859code; +- char *asciistr; ++ const char *asciistr; ++ unsigned long unicode; + } entities[] = { +- { "AElig", LATIN1_AElig, "AE" }, +- { "AMP", 0, "&" }, +- { "Aacute", LATIN1_Aacute, "A'" }, +- { "Acirc", LATIN1_Acirc, "A^" }, +- { "Agrave", LATIN1_Agrave, "A`" }, +- { "Alpha", 0, "A" }, +- { "Aring", LATIN1_Aring, "AA" }, +- { "Atilde", LATIN1_Atilde, "A~" }, +- { "Auml", LATIN1_Auml, "A\"" }, +- { "Beta", 0, "B" }, +- { "Ccedil", LATIN1_Ccedil, "C," }, +- { "Chi", 0, "H" }, +- { "Dagger", 0, "++" }, +- { "Delta", 0, "D" }, +- { "ETH", LATIN1_ETH, "D-" }, +- { "Eacute", LATIN1_Eacute, "E'" }, +- { "Ecirc", LATIN1_Ecirc, "E^" }, +- { "Egrave", LATIN1_Egrave, "E`" }, +- { "Epsilon", 0, "E" }, +- { "Eta", 0, "E" }, +- { "Euml", LATIN1_Euml, "E\"" }, +- { "GT", 0, ">" }, +- { "Gamma", 0, "G" }, +- { "Iacute", LATIN1_Iacute, "I'" }, +- { "Icirc", LATIN1_Icirc, "I^" }, +- { "Igrave", LATIN1_Igrave, "I`" }, +- { "Iota", 0, "I" }, +- { "Iuml", LATIN1_Iuml, "I\"" }, +- { "Kappa", 0, "K" }, +- { "LT", 0, "<" }, +- { "Lambda", 0, "L" }, +- { "Mu", 0, "M" }, +- { "Ntilde", LATIN1_Ntilde, "N~" }, +- { "Nu", 0, "N" }, +- { "OElig", 0, "OE" }, +- { "Oacute", LATIN1_Oacute, "O'" }, +- { "Ocirc", LATIN1_Ocirc, "O^" }, +- { "Ograve", LATIN1_Ograve, "O`" }, +- { "Omega", 0, "O" }, +- { "Omicron", 0, "O" }, +- { "Oslash", LATIN1_Oslash, "O/" }, +- { "Otilde", LATIN1_Otilde, "O~" }, +- { "Ouml", LATIN1_Ouml, "O\"" }, +- { "Phi", 0, "F" }, +- { "Pi", 0, "P" }, +- { "Prime", 0, "''" }, +- { "Psi", 0, "PS" }, +- { "QUOT", 0, "\"" }, +- { "Rho", 0, "R" }, +- { "Scaron", 0, "S" }, +- { "Sigma", 0, "S" }, +- { "THORN", LATIN1_THORN, "TH" }, +- { "Tau", 0, "T" }, +- { "Theta", 0, "TH" }, +- { "Uacute", LATIN1_Uacute, "U'" }, +- { "Ucirc", LATIN1_Ucirc, "U^" }, +- { "Ugrave", LATIN1_Ugrave, "U`" }, +- { "Upsilon", 0, "U" }, +- { "Uuml", LATIN1_Uuml, "U\"" }, +- { "Xi", 0, "X" }, +- { "Yacute", LATIN1_Yacute, "Y'" }, +- { "Yuml", 0, "Y\"" }, +- { "Zeta", 0, "Z" }, +- { "aacute", LATIN1_aacute, "a'" }, +- { "acirc", LATIN1_acirc, "a^" }, +- { "acute", LATIN1_acute, "'" }, +- { "aelig", LATIN1_aelig, "ae" }, +- { "agrave", LATIN1_agrave, "a`" }, +- { "alefsym", 0, "Aleph" }, +- { "alpha", 0, "a" }, +- { "amp", 0, "&" }, +- { "and", 0, "AND" }, +- { "ang", 0, "-V" }, +- { "apos", 0, "'" }, +- { "aring", LATIN1_aring, "aa" }, +- { "asymp", 0, "~=" }, +- { "atilde", LATIN1_atilde, "a~" }, +- { "auml", LATIN1_auml, "a\"" }, +- { "bdquo", 0, "\"" }, +- { "beta", 0, "b" }, +- { "brvbar", LATIN1_brvbar, "|" }, +- { "bull", 0, " o " }, +- { "cap", 0, "(U" }, +- { "ccedil", LATIN1_ccedil, "c," }, +- { "cedil", LATIN1_cedil, "," }, +- { "cent", LATIN1_cent, "-c-" }, +- { "chi", 0, "h" }, +- { "circ", 0, "^" }, ++ { "AElig", LATIN1_AElig, "AE", 0x00c6}, ++ { "AMP", 0, "&", 0x0026}, ++ { "Aacute", LATIN1_Aacute, "A'", 0x00c1}, ++ { "Acirc", LATIN1_Acirc, "A^", 0x00c2}, ++ { "Agrave", LATIN1_Agrave, "A`", 0x00c0}, ++ { "Alpha", 0, "A", 0x0391}, ++ { "Aring", LATIN1_Aring, "AA", 0x00c5}, ++ { "Atilde", LATIN1_Atilde, "A~", 0x00c3}, ++ { "Auml", LATIN1_Auml, "A\"", 0x00c4}, ++ { "Beta", 0, "B", 0x0392}, ++ { "Ccedil", LATIN1_Ccedil, "C,", 0x00c7}, ++ { "Chi", 0, "H", 0x03a7}, ++ { "Dagger", 0, "++", 0x2020}, ++ { "Delta", 0, "D", 0x0394}, ++ { "ETH", LATIN1_ETH, "D-", 0x00d0}, ++ { "Eacute", LATIN1_Eacute, "E'", 0x00c9}, ++ { "Ecirc", LATIN1_Ecirc, "E^", 0x00ca}, ++ { "Egrave", LATIN1_Egrave, "E`", 0x00c8}, ++ { "Epsilon", 0, "E", 0x0395}, ++ { "Eta", 0, "E", 0x0397}, ++ { "Euml", LATIN1_Euml, "E\"", 0x00cb}, ++ { "GT", 0, ">", 0x003e}, ++ { "Gamma", 0, "G", 0x0393}, ++ { "Iacute", LATIN1_Iacute, "I'", 0x00cd}, ++ { "Icirc", LATIN1_Icirc, "I^", 0x00ce}, ++ { "Igrave", LATIN1_Igrave, "I`", 0x00cc}, ++ { "Iota", 0, "I", 0x0399}, ++ { "Iuml", LATIN1_Iuml, "I\"", 0x00cf}, ++ { "Kappa", 0, "K", 0x039a}, ++ { "LT", 0, "<", 0x003c}, ++ { "Lambda", 0, "L", 0x039b}, ++ { "Mu", 0, "M", 0x039c}, ++ { "Ntilde", LATIN1_Ntilde, "N~", 0x00d1}, ++ { "Nu", 0, "N", 0x039d}, ++ { "OElig", 0, "OE", 0x0152}, ++ { "Oacute", LATIN1_Oacute, "O'", 0x00d3}, ++ { "Ocirc", LATIN1_Ocirc, "O^", 0x00d4}, ++ { "Ograve", LATIN1_Ograve, "O`", 0x00d2}, ++ { "Omega", 0, "O", 0x03a9}, ++ { "Omicron", 0, "O", 0x039f}, ++ { "Oslash", LATIN1_Oslash, "O/", 0x00d8}, ++ { "Otilde", LATIN1_Otilde, "O~", 0x00d5}, ++ { "Ouml", LATIN1_Ouml, "O\"", 0x00d6}, ++ { "Phi", 0, "F", 0x03a6}, ++ { "Pi", 0, "P", 0x03a0}, ++ { "Prime", 0, "''", 0x2032}, ++ { "Psi", 0, "PS", 0x03a8}, ++ { "QUOT", 0, "\"", 0x0022}, ++ { "Rho", 0, "R", 0x03a1}, ++ { "Scaron", 0, "S", 0x0161}, ++ { "Sigma", 0, "S", 0x03a3}, ++ { "THORN", LATIN1_THORN, "TH", 0x00de}, ++ { "Tau", 0, "T", 0x03a4}, ++ { "Theta", 0, "TH", 0x0398}, ++ { "Uacute", LATIN1_Uacute, "U'", 0x00da}, ++ { "Ucirc", LATIN1_Ucirc, "U^", 0x00db}, ++ { "Ugrave", LATIN1_Ugrave, "U`", 0x00d9}, ++ { "Upsilon", 0, "U", 0x03a5}, ++ { "Uuml", LATIN1_Uuml, "U\"", 0x00dc}, ++ { "Xi", 0, "X", 0x039e}, ++ { "Yacute", LATIN1_Yacute, "Y'", 0x00dd}, ++ { "Yuml", 0, "Y\"", 0x0178}, ++ { "Zeta", 0, "Z", 0x0396}, ++ { "aacute", LATIN1_aacute, "a'", 0x00e1}, ++ { "acirc", LATIN1_acirc, "a^", 0x00e2}, ++ { "acute", LATIN1_acute, "'", 0x00b4}, ++ { "aelig", LATIN1_aelig, "ae", 0x00e6}, ++ { "agrave", LATIN1_agrave, "a`", 0x00e0}, ++ { "alefsym", 0, "Aleph",0x2135}, ++ { "alpha", 0, "a", 0x03b1}, ++ { "amp", 0, "&", 0x0026}, ++ { "and", 0, "AND", 0x2227}, ++ { "ang", 0, "-V", 0x2220}, ++ { "apos", 0, "'", 0x0027}, ++ { "aring", LATIN1_aring, "aa", 0x00e5}, ++ { "asymp", 0, "~=", 0x2248}, ++ { "atilde", LATIN1_atilde, "a~", 0x00e3}, ++ { "auml", LATIN1_auml, "a\"", 0x00e4}, ++ { "bdquo", 0, "\"", 0x201e}, ++ { "beta", 0, "b", 0x03b2}, ++ { "brvbar", LATIN1_brvbar, "|", 0x00a6}, ++ { "bull", 0, " o ", 0x2022}, ++ { "cap", 0, "(U", 0x2229}, ++ { "ccedil", LATIN1_ccedil, "c,", 0x00e7}, ++ { "cedil", LATIN1_cedil, ",", 0x00b8}, ++ { "cent", LATIN1_cent, "-c-", 0x00a2}, ++ { "chi", 0, "h", 0x03c7}, ++ { "circ", 0, "^", 0x005e}, + // { "clubs", 0, "[clubs]" }, +- { "cong", 0, "?=" }, +- { "copy", LATIN1_copy, "(c)" }, +- { "crarr", 0, "<-'" }, +- { "cup", 0, ")U" }, +- { "curren", LATIN1_curren, "CUR" }, +- { "dArr", 0, "vv" }, +- { "dagger", 0, "+" }, +- { "darr", 0, "v" }, +- { "deg", LATIN1_deg, "DEG" }, +- { "delta", 0, "d" }, ++ { "cong", 0, "?=", 0x2245}, ++ { "copy", LATIN1_copy, "(c)", 0x00a9}, ++ { "crarr", 0, "<-'", 0x21b5}, ++ { "cup", 0, ")U", 0x222a}, ++ { "curren", LATIN1_curren, "CUR", 0x00a4}, ++ { "dArr", 0, "vv", 0x2193}, ++ { "dagger", 0, "+", 0x2020}, ++ { "darr", 0, "v", 0x2193}, ++ { "deg", LATIN1_deg, "DEG", 0x00b0}, ++ { "delta", 0, "d", 0x03b4}, + // { "diams", 0, "[diamonds]" }, +- { "divide", LATIN1_divide, "/" }, +- { "eacute", LATIN1_eacute, "e'" }, +- { "ecirc", LATIN1_ecirc, "e^" }, +- { "egrave", LATIN1_egrave, "e`" }, +- { "empty", 0, "{}" }, +- { "epsilon", 0, "e" }, +- { "equiv", 0, "==" }, +- { "eta", 0, "e" }, +- { "eth", LATIN1_eth, "d-" }, +- { "euml", LATIN1_euml, "e\"" }, +- { "euro", 0, "EUR" }, +- { "exist", 0, "TE" }, +- { "fnof", 0, "f" }, +- { "forall", 0, "FA" }, +- { "frac12", LATIN1_frac12, " 1/2" }, +- { "frac14", LATIN1_frac14, " 1/4" }, +- { "frac34", LATIN1_frac34, " 3/4" }, +- { "frasl", 0, "/" }, +- { "gamma", 0, "g" }, +- { "ge", 0, ">=" }, +- { "gt", 0, ">" }, +- { "hArr", 0, "<=>" }, +- { "harr", 0, "<->" }, ++ { "divide", LATIN1_divide, "/", 0x00f7}, ++ { "eacute", LATIN1_eacute, "e'", 0x00e9}, ++ { "ecirc", LATIN1_ecirc, "e^", 0x00ea}, ++ { "egrave", LATIN1_egrave, "e`", 0x00e8}, ++ { "empty", 0, "{}", 0x2205}, ++ { "epsilon", 0, "e", 0x03b5}, ++ { "equiv", 0, "==", 0x2261}, ++ { "eta", 0, "e", 0x03b7}, ++ { "eth", LATIN1_eth, "d-", 0x00f0}, ++ { "euml", LATIN1_euml, "e\"", 0x00eb}, ++ { "euro", 0, "EUR", 0x20ac}, ++ { "exist", 0, "TE", 0x2203}, ++ { "fnof", 0, "f", 0x0192}, ++ { "forall", 0, "FA", 0x2200}, ++ { "frac12", LATIN1_frac12, " 1/2",0x00bd}, ++ { "frac14", LATIN1_frac14, " 1/4",0x00bc}, ++ { "frac34", LATIN1_frac34, " 3/4",0x00be}, ++ { "frasl", 0, "/" , 0x2044}, ++ { "gamma", 0, "g", 0x03b3}, ++ { "ge", 0, ">=", 0x2265}, ++ { "gt", 0, ">", 0x003e}, ++ { "hArr", 0, "<=>", 0x21d4}, ++ { "harr", 0, "<->", 0x2194}, + // { "hearts", 0, "[hearts]" }, +- { "hellip", 0, "..." }, +- { "iacute", LATIN1_iacute, "i'" }, +- { "icirc", LATIN1_icirc, "i^" }, +- { "iexcl", LATIN1_iexcl, "!" }, +- { "igrave", LATIN1_igrave, "i`" }, +- { "image", 0, "Im" }, +- { "infin", 0, "oo" }, +- { "int", 0, "INT" }, +- { "iota", 0, "i" }, +- { "iquest", LATIN1_iquest, "?" }, +- { "isin", 0, "(-" }, +- { "iuml", LATIN1_iuml, "i\"" }, +- { "kappa", 0, "k" }, +- { "lArr", 0, "<=" }, +- { "lambda", 0, "l" }, +- { "lang", 0, "</" }, +- { "laquo", LATIN1_laquo, "<<" }, +- { "larr", 0, "<-" }, ++ { "hellip", 0, "...", 0x2026}, ++ { "iacute", LATIN1_iacute, "i'", 0x00ed}, ++ { "icirc", LATIN1_icirc, "i^", 0x00ee}, ++ { "iexcl", LATIN1_iexcl, "!", 0x00a1}, ++ { "igrave", LATIN1_igrave, "i`", 0x00ec}, ++ { "image", 0, "Im", 0x2111}, ++ { "infin", 0, "oo", 0x221e}, ++ { "int", 0, "INT", 0x222b}, ++ { "iota", 0, "i", 0x03b9}, ++ { "iquest", LATIN1_iquest, "?", 0x00bf}, ++ { "isin", 0, "(-", 0x2208}, ++ { "iuml", LATIN1_iuml, "i\"", 0x00ef}, ++ { "kappa", 0, "k", 0x03ba}, ++ { "lArr", 0, "<=", 0x2190}, ++ { "lambda", 0, "l", 0x03bb}, ++ { "lang", 0, "</", 0x2329}, ++ { "laquo", LATIN1_laquo, "<<", 0x00ab}, ++ { "larr", 0, "<-", 0x2190}, + // { "lceil", 0, "<|" }, +- { "ldquo", 0, "\"" }, +- { "le", 0, "<=" }, ++ { "ldquo", 0, "\"", 0x201c}, ++ { "le", 0, "<=", 0x2264}, + // { "lfloor", 0, "|<" }, +- { "lowast", 0, "*" }, +- { "loz", 0, "<>" }, +- { "lsaquo", 0, "<" }, +- { "lsquo", 0, "`" }, +- { "lt", 0, "<" }, +- { "macr", LATIN1_macr, "-" }, +- { "mdash", 0, "--" }, +- { "micro", LATIN1_micro, "my" }, +- { "middot", LATIN1_middot, "." }, +- { "minus", 0, "-" }, +- { "mu", 0, "m" }, +- { "nabla", 0, "Nabla" }, +- { "nbsp", LATIN1_nbsp, " " }, +- { "ndash", 0, "-" }, +- { "ne", 0, "!=" }, +- { "ni", 0, "-)" }, +- { "not", LATIN1_not, "NOT" }, +- { "notin", 0, "!(-" }, +- { "nsub", 0, "!(C" }, +- { "ntilde", LATIN1_ntilde, "n~" }, +- { "nu", 0, "n" }, +- { "oacute", LATIN1_oacute, "o'" }, +- { "ocirc", LATIN1_ocirc, "o^" }, +- { "oelig", 0, "oe" }, +- { "ograve", LATIN1_ograve, "o`" }, +- { "oline", LATIN1_macr, "-" }, +- { "omega", 0, "o" }, +- { "omicron", 0, "o" }, +- { "oplus", 0, "(+)" }, +- { "or", 0, "OR" }, +- { "ordf", LATIN1_ordf, "-a" }, +- { "ordm", LATIN1_ordm, "-o" }, +- { "oslash", LATIN1_oslash, "o/" }, +- { "otilde", LATIN1_otilde, "o~" }, +- { "otimes", 0, "(x)" }, +- { "ouml", LATIN1_ouml, "o\"" }, +- { "para", LATIN1_para, "P:" }, +- { "part", 0, "PART" }, +- { "permil", 0, " 0/00" }, +- { "perp", 0, "-T" }, +- { "phi", 0, "f" }, +- { "pi", 0, "p" }, +- { "piv", 0, "Pi" }, +- { "plusmn", LATIN1_plusmn, "+/-" }, +- { "pound", LATIN1_pound, "-L-" }, +- { "prime", 0, "'" }, +- { "prod", 0, "PROD" }, +- { "prop", 0, "0(" }, +- { "psi", 0, "ps" }, +- { "quot", 0, "\"" }, +- { "rArr", 0, "=>" }, +- { "radic", 0, "SQRT" }, +- { "rang", 0, "/>" }, +- { "raquo", LATIN1_raquo, ">>" }, +- { "rarr", 0, "->" }, ++ { "lowast", 0, "*", 0x2217}, ++ { "loz", 0, "<>", 0x25ca}, ++ { "lsaquo", 0, "<", 0x2039}, ++ { "lsquo", 0, "`", 0x2018}, ++ { "lt", 0, "<", 0x003c}, ++ { "macr", LATIN1_macr, "-", 0x00af}, ++ { "mdash", 0, "--", 0x2014}, ++ { "micro", LATIN1_micro, "my", 0x00b5}, ++ { "middot", LATIN1_middot, ".", 0x00b7}, ++ { "minus", 0, "-", 0x2212}, ++ { "mu", 0, "m", 0x03bc}, ++ { "nabla", 0, "Nabla",0x2207}, ++ { "nbsp", LATIN1_nbsp, " ", 0x00a0}, ++ { "ndash", 0, "-", 0x2013}, ++ { "ne", 0, "!=", 0x2260}, ++ { "ni", 0, "-)", 0x220b}, ++ { "not", LATIN1_not, "NOT", 0x00ac}, ++ { "notin", 0, "!(-", 0x2209}, ++ { "nsub", 0, "!(C", 0x2284}, ++ { "ntilde", LATIN1_ntilde, "n~", 0x00f1}, ++ { "nu", 0, "n", 0x03bd}, ++ { "oacute", LATIN1_oacute, "o'", 0x00f3}, ++ { "ocirc", LATIN1_ocirc, "o^", 0x00f4}, ++ { "oelig", 0, "oe", 0x0153}, ++ { "ograve", LATIN1_ograve, "o`", 0x00f2}, ++ { "oline", LATIN1_macr, "-", 0x203e}, ++ { "omega", 0, "o", 0x03c9}, ++ { "omicron", 0, "o", 0x03bf}, ++ { "oplus", 0, "(+)", 0x2295}, ++ { "or", 0, "OR", 0x2228}, ++ { "ordf", LATIN1_ordf, "-a", 0x00aa}, ++ { "ordm", LATIN1_ordm, "-o", 0x00ba}, ++ { "oslash", LATIN1_oslash, "o/", 0x00f8}, ++ { "otilde", LATIN1_otilde, "o~", 0x00f5}, ++ { "otimes", 0, "(x)", 0x2297}, ++ { "ouml", LATIN1_ouml, "o\"", 0x00f6}, ++ { "para", LATIN1_para, "P:", 0x00b6}, ++ { "part", 0, "PART",0x2202}, ++ { "permil", 0, " 0/00",0x2030}, ++ { "perp", 0, "-T", 0x22a5}, ++ { "phi", 0, "f", 0x03c6}, ++ { "pi", 0, "p", 0x03c0}, ++ { "piv", 0, "Pi", 0x03d6}, ++ { "plusmn", LATIN1_plusmn, "+/-", 0x00b1}, ++ { "pound", LATIN1_pound, "-L-", 0x00a3}, ++ { "prime", 0, "'", 0x2032}, ++ { "prod", 0, "PROD",0x220f}, ++ { "prop", 0, "0(", 0x221d}, ++ { "psi", 0, "ps", 0x03c8}, ++ { "quot", 0, "\"", 0x0022}, ++ { "rArr", 0, "=>", 0x21d2}, ++ { "radic", 0, "SQRT",0x221a}, ++ { "rang", 0, "/>", 0x232a}, ++ { "raquo", LATIN1_raquo, ">>", 0x00bb}, ++ { "rarr", 0, "->", 0x2192}, + // { "rceil", 0, ">|" }, +- { "rdquo", 0, "\"" }, +- { "real", 0, "Re" }, +- { "reg", LATIN1_reg, "(R)" }, ++ { "rdquo", 0, "\"", 0x201d}, ++ { "real", 0, "Re", 0x211c}, ++ { "reg", LATIN1_reg, "(R)", 0x00ae}, + // { "rfloor", 0, "|>" }, +- { "rho", 0, "r" }, +- { "rsaquo", 0, ">" }, +- { "rsquo", 0, "'" }, +- { "sbquo", 0, "'" }, +- { "scaron", 0, "s" }, +- { "sdot", 0, "DOT" }, +- { "sect", LATIN1_sect, "S:" }, +- { "shy", LATIN1_shy, "" }, +- { "sigma", 0, "s" }, +- { "sigmaf", 0, "s" }, +- { "sim", 0, "~" }, ++ { "rho", 0, "r", 0x03c1}, ++ { "rsaquo", 0, ">", 0x203a}, ++ { "rsquo", 0, "'", 0x2019}, ++ { "sbquo", 0, "'", 0x201a}, ++ { "scaron", 0, "s", 0x0161}, ++ { "sdot", 0, "DOT", 0x22c5}, ++ { "sect", LATIN1_sect, "S:", 0x00a7}, ++ { "shy", LATIN1_shy, "", 0x00ad}, ++ { "sigma", 0, "s", 0x03c3}, ++ { "sigmaf", 0, "s", 0x03c2}, ++ { "sim", 0, "~", 0x223c}, + // { "spades", 0, "[spades]" }, +- { "sub", 0, "(C" }, +- { "sube", 0, "(_" }, +- { "sum", 0, "SUM" }, +- { "sup", 0, ")C" }, +- { "sup1", LATIN1_sup1, "^1" }, +- { "sup2", LATIN1_sup2, "^2" }, +- { "sup3", LATIN1_sup3, "^3" }, +- { "supe", 0, ")_" }, +- { "szlig", LATIN1_szlig, "ss" }, +- { "tau", 0, "t" }, +- { "there4", 0, ".:" }, +- { "theta", 0, "th" }, +- { "thorn", LATIN1_thorn, "th" }, +- { "tilde", 0, "~" }, +- { "times", LATIN1_times, "x" }, +- { "trade", 0, "[TM]" }, +- { "uArr", 0, "^^" }, +- { "uacute", LATIN1_uacute, "u'" }, +- { "uarr", 0, "^" }, +- { "ucirc", LATIN1_ucirc, "u^" }, +- { "ugrave", LATIN1_ugrave, "u`" }, +- { "uml", LATIN1_uml, "\"" }, +- { "upsilon", 0, "u" }, +- { "uuml", LATIN1_uuml, "u\"" }, +- { "weierp", 0, "P" }, +- { "xi", 0, "x" }, +- { "yacute", LATIN1_yacute, "y'" }, +- { "yen", LATIN1_yen, "YEN" }, +- { "yuml", LATIN1_yuml, "y\"" }, +- { "zeta", 0, "z" }, ++ { "sub", 0, "(C", 0x2282}, ++ { "sube", 0, "(_", 0x2286}, ++ { "sum", 0, "SUM", 0x2211}, ++ { "sup", 0, ")C", 0x2283}, ++ { "sup1", LATIN1_sup1, "^1", 0x00b9}, ++ { "sup2", LATIN1_sup2, "^2", 0x00b2}, ++ { "sup3", LATIN1_sup3, "^3", 0x00b3}, ++ { "supe", 0, ")_", 0x2287}, ++ { "szlig", LATIN1_szlig, "ss", 0x00df}, ++ { "tau", 0, "t", 0x03c4}, ++ { "there4", 0, ".:", 0x2234}, ++ { "theta", 0, "th", 0x03b8}, ++ { "thorn", LATIN1_thorn, "th", 0x00fe}, ++ { "tilde", 0, "~", 0x02dc}, ++ { "times", LATIN1_times, "x", 0x00d7}, ++ { "trade", 0, "[TM]",0x2122}, ++ { "uArr", 0, "^^", 0x21d1}, ++ { "uacute", LATIN1_uacute, "u'", 0x00fa}, ++ { "uarr", 0, "^", 0x2191}, ++ { "ucirc", LATIN1_ucirc, "u^", 0x00fb}, ++ { "ugrave", LATIN1_ugrave, "u`", 0x00f9}, ++ { "uml", LATIN1_uml, "\"", 0x00a8}, ++ { "upsilon", 0, "u", 0x03c5}, ++ { "uuml", LATIN1_uuml, "u\"", 0x00fc}, ++ { "weierp", 0, "P", 0x2118}, ++ { "xi", 0, "x", 0x03be}, ++ { "yacute", LATIN1_yacute, "y'", 0x00fd}, ++ { "yen", LATIN1_yen, "YEN", 0x00a5}, ++ { "yuml", LATIN1_yuml, "y\"", 0x00ff}, ++ { "zeta", 0, "z", 0x03b6}, + }; + +-extern int use_iso8859; ++extern int use_encoding; + + /* ------------------------------------------------------------------------- */ + ++char ubuf[4]; ++ ++char *mkutf(unsigned long x) ++{ ++ memset(ubuf, 0, 4); ++ if(x < 128) ubuf[0] = x; ++ else if(x < 0x800) { ++ ubuf[0] = (0xc0 | ((x >> 6) & 0x1f)); ++ ubuf[1] = (0x80 | (x & 0x3f)); ++ } ++ else { ++ ubuf[0] = (0xe0 | ((x >> 12) & 0x0f)); ++ ubuf[1] = (0x80 | ((x >> 6) & 0x3f)); ++ ubuf[2] = (0x80 | (x & 0x3f)); ++ } ++ return ubuf; ++} ++ + void + replace_sgml_entities(string *s) + { +@@ -330,9 +349,9 @@ replace_sgml_entities(string *s) + */ + while (j < l && s->at(j) != '&') ++j; + /* +- * We could convert high-bit chars to "é" here if use_iso8859 +- * is off, then let them be translated or not. Is the purpose of +- * !use_iso8859 to allow SGML entities to be seen, or to strongly ++ * We could convert high-bit chars to "é" here if USE_ASCII ++ * is on, then let them be translated or not. Is the purpose of ++ * USE_ASCII to allow SGML entities to be seen, or to strongly + * filter against high-ASCII chars that might blow up a terminal + * that doesn't speak ISO8859? For the moment, "allow SGML entities + * to be seen" -- no filtering here. +@@ -370,7 +389,11 @@ replace_sgml_entities(string *s) + if (!isdigit(c)) break; + x = 10 * x + c - '0'; + } +- if (use_iso8859 || (x < 128)) { ++ if (USE_UTF8) { ++ s->replace(beg, j - beg, mkutf(x)); ++ j = beg + 1; ++ } ++ else if (USE_ISO8859 && (x < 256) || USE_ASCII && (x < 128)) { + s->replace(beg, j - beg, 1, (char) x); + j = beg + 1; + } else { +@@ -408,13 +431,17 @@ replace_sgml_entities(string *s) + (int (*)(const void *, const void *)) strcmp + ); + if (entity != NULL) { +- if (use_iso8859 && entity->iso8859code) { ++ if (USE_ISO8859 && entity->iso8859code) { + s->replace(beg, j - beg, 1, (char) entity->iso8859code); + j = beg + 1; +- } else if (entity->asciistr) { ++ } else if (USE_ASCII && entity->asciistr) { + s->replace(beg, j - beg, entity->asciistr); + j = beg + 1; + } /* else don't replace it at all, we don't have a translation */ ++ else if(USE_UTF8 && entity->unicode) { ++ s->replace(beg, j - beg, mkutf(entity->unicode)); ++ j = beg + 1; ++ } + } + } else { + ; /* EXTENSION: Allow literal '&' sometimes. */ diff -Nrua -x CVS /usr/ports/converters/html2text/patches/patch-table_C ./patches/patch-table_C --- /usr/ports/converters/html2text/patches/patch-table_C Thu Jan 1 01:00:00 1970 +++ ./patches/patch-table_C Sat Jan 21 20:58:11 2017 @@ -0,0 +1,64 @@ +$OpenBSD$ +--- table.C.orig Mon Jul 22 13:32:50 2002 ++++ table.C Sat Jan 21 20:57:57 2017 +@@ -122,14 +122,14 @@ create_lcs( + "LEFT", Area::LEFT, + "CENTER", Area::CENTER, + "RIGHT", Area::RIGHT, +- 0 ++ NULL + ); + int row_valign = get_attribute( + row.attributes.get(), "VALIGN", Area::MIDDLE, + "TOP", Area::LEFT, + "MIDDLE", Area::MIDDLE, + "BOTTOM", Area::BOTTOM, +- 0 ++ NULL + ); + + const list<auto_ptr<TableCell> > &cl(*row.cells); +@@ -158,14 +158,14 @@ create_lcs( + "LEFT", Area::LEFT, + "CENTER", Area::CENTER, + "RIGHT", Area::RIGHT, +- 0 ++ NULL + ); + p->valign = get_attribute( + cell.attributes.get(), "VALIGN", row_valign, + "TOP", Area::TOP, + "MIDDLE", Area::MIDDLE, + "BOTTOM", Area::BOTTOM, +- 0 ++ NULL + ); + { + auto_ptr<Area> tmp(cell.format( +@@ -175,7 +175,7 @@ create_lcs( + - (*number_of_columns_return - 1) * (column_spacing + 0), + Area::LEFT // Yields better results than "p->halign"! + )); +- p->width = tmp.get() ? tmp->width() : 0; ++ p->width = tmp.get() ? tmp->utf_width() : 0; + } + p->minimized = false; + +@@ -308,7 +308,7 @@ narrow_table( + left_of_column + old_column_width - 1, + Area::LEFT // Yields better results than "lc.halign"! + )); +- w = tmp->width(); ++ w = tmp->utf_width(); + if (w >= left_of_column + old_column_width) lc.minimized = true; + } + if (w > left_of_column + new_column_width) { +@@ -386,7 +386,7 @@ Table::format(Area::size_type w, int halign) const + "LEFT", Area::LEFT, + "CENTER", Area::CENTER, + "RIGHT", Area::RIGHT, +- 0 ++ NULL + ); + + // <TABLE> => default => no border diff -Nrua -x CVS /usr/ports/converters/html2text/pkg/PLIST ./pkg/PLIST --- /usr/ports/converters/html2text/pkg/PLIST Sat Nov 22 13:06:22 2014 +++ ./pkg/PLIST Sat Jan 21 21:45:17 2017 @@ -1,5 +1,5 @@ @comment $OpenBSD: PLIST,v 1.3 2014/11/22 12:06:22 landry Exp $ -bin/html2text +@bin bin/html2text @man man/man1/html2text.1 @man man/man5/html2textrc.5 share/doc/html2text/