http://git-wip-us.apache.org/repos/asf/incubator-senssoft-tap/blob/6a81d1e7/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_ihatexml.py
----------------------------------------------------------------------
diff --git a/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_ihatexml.py 
b/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_ihatexml.py
deleted file mode 100644
index d6d1d6f..0000000
--- a/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_ihatexml.py
+++ /dev/null
@@ -1,288 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-import warnings
-
-from .constants import DataLossWarning
-
-baseChar = """
-[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
-[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
-[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
-[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
-[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
-[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
-[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
-[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
-[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
-[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
-[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
-[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
-[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
-[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
-[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
-[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
-[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
-[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
-[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
-[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
-[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
-[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
-[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
-[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
-[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
-[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
-[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
-[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
-[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
-[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
-#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
-#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
-#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
-[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
-[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
-#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
-[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
-[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
-[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
-[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
-[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
-#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
-[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
-[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
-[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
-[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
-
-ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
-
-combiningCharacter = """
-[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
-[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
-[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
-[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
-#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
-[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
-[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
-#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
-[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
-[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
-#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
-[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
-[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
-[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
-[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
-[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
-#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
-[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
-#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
-[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
-[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
-#x3099 | #x309A"""
-
-digit = """
-[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
-[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
-[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
-[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
-
-extender = """
-#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
-#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
-
-letter = " | ".join([baseChar, ideographic])
-
-# Without the
-name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
-                   extender])
-nameFirst = " | ".join([letter, "_"])
-
-reChar = re.compile(r"#x([\d|A-F]{4,4})")
-reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
-
-
-def charStringToList(chars):
-    charRanges = [item.strip() for item in chars.split(" | ")]
-    rv = []
-    for item in charRanges:
-        foundMatch = False
-        for regexp in (reChar, reCharRange):
-            match = regexp.match(item)
-            if match is not None:
-                rv.append([hexToInt(item) for item in match.groups()])
-                if len(rv[-1]) == 1:
-                    rv[-1] = rv[-1] * 2
-                foundMatch = True
-                break
-        if not foundMatch:
-            assert len(item) == 1
-
-            rv.append([ord(item)] * 2)
-    rv = normaliseCharList(rv)
-    return rv
-
-
-def normaliseCharList(charList):
-    charList = sorted(charList)
-    for item in charList:
-        assert item[1] >= item[0]
-    rv = []
-    i = 0
-    while i < len(charList):
-        j = 1
-        rv.append(charList[i])
-        while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
-            rv[-1][1] = charList[i + j][1]
-            j += 1
-        i += j
-    return rv
-
-# We don't really support characters above the BMP :(
-max_unicode = int("FFFF", 16)
-
-
-def missingRanges(charList):
-    rv = []
-    if charList[0] != 0:
-        rv.append([0, charList[0][0] - 1])
-    for i, item in enumerate(charList[:-1]):
-        rv.append([item[1] + 1, charList[i + 1][0] - 1])
-    if charList[-1][1] != max_unicode:
-        rv.append([charList[-1][1] + 1, max_unicode])
-    return rv
-
-
-def listToRegexpStr(charList):
-    rv = []
-    for item in charList:
-        if item[0] == item[1]:
-            rv.append(escapeRegexp(chr(item[0])))
-        else:
-            rv.append(escapeRegexp(chr(item[0])) + "-" +
-                      escapeRegexp(chr(item[1])))
-    return "[%s]" % "".join(rv)
-
-
-def hexToInt(hex_str):
-    return int(hex_str, 16)
-
-
-def escapeRegexp(string):
-    specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
-                         "[", "]", "|", "(", ")", "-")
-    for char in specialCharacters:
-        string = string.replace(char, "\\" + char)
-
-    return string
-
-# output from the above
-nonXmlNameBMPRegexp = 
re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-
 
\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0
 
f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
  # noqa
-
-nonXmlNameFirstBMPRegexp = 
re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0
 
bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u30
 2a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  
# noqa
-
-# Simpler things
-nonPubidCharRegexp = 
re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
-
-
-class InfosetFilter(object):
-    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
-
-    def __init__(self,
-                 dropXmlnsLocalName=False,
-                 dropXmlnsAttrNs=False,
-                 preventDoubleDashComments=False,
-                 preventDashAtCommentEnd=False,
-                 replaceFormFeedCharacters=True,
-                 preventSingleQuotePubid=False):
-
-        self.dropXmlnsLocalName = dropXmlnsLocalName
-        self.dropXmlnsAttrNs = dropXmlnsAttrNs
-
-        self.preventDoubleDashComments = preventDoubleDashComments
-        self.preventDashAtCommentEnd = preventDashAtCommentEnd
-
-        self.replaceFormFeedCharacters = replaceFormFeedCharacters
-
-        self.preventSingleQuotePubid = preventSingleQuotePubid
-
-        self.replaceCache = {}
-
-    def coerceAttribute(self, name, namespace=None):
-        if self.dropXmlnsLocalName and name.startswith("xmlns:"):
-            warnings.warn("Attributes cannot begin with xmlns", 
DataLossWarning)
-            return None
-        elif (self.dropXmlnsAttrNs and
-              namespace == "http://www.w3.org/2000/xmlns/";):
-            warnings.warn("Attributes cannot be in the xml namespace", 
DataLossWarning)
-            return None
-        else:
-            return self.toXmlName(name)
-
-    def coerceElement(self, name):
-        return self.toXmlName(name)
-
-    def coerceComment(self, data):
-        if self.preventDoubleDashComments:
-            while "--" in data:
-                warnings.warn("Comments cannot contain adjacent dashes", 
DataLossWarning)
-                data = data.replace("--", "- -")
-            if data.endswith("-"):
-                warnings.warn("Comments cannot end in a dash", DataLossWarning)
-                data += " "
-        return data
-
-    def coerceCharacters(self, data):
-        if self.replaceFormFeedCharacters:
-            for _ in range(data.count("\x0C")):
-                warnings.warn("Text cannot contain U+000C", DataLossWarning)
-            data = data.replace("\x0C", " ")
-        # Other non-xml characters
-        return data
-
-    def coercePubid(self, data):
-        dataOutput = data
-        for char in nonPubidCharRegexp.findall(data):
-            warnings.warn("Coercing non-XML pubid", DataLossWarning)
-            replacement = self.getReplacementCharacter(char)
-            dataOutput = dataOutput.replace(char, replacement)
-        if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
-            warnings.warn("Pubid cannot contain single quote", DataLossWarning)
-            dataOutput = dataOutput.replace("'", 
self.getReplacementCharacter("'"))
-        return dataOutput
-
-    def toXmlName(self, name):
-        nameFirst = name[0]
-        nameRest = name[1:]
-        m = nonXmlNameFirstBMPRegexp.match(nameFirst)
-        if m:
-            warnings.warn("Coercing non-XML name", DataLossWarning)
-            nameFirstOutput = self.getReplacementCharacter(nameFirst)
-        else:
-            nameFirstOutput = nameFirst
-
-        nameRestOutput = nameRest
-        replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
-        for char in replaceChars:
-            warnings.warn("Coercing non-XML name", DataLossWarning)
-            replacement = self.getReplacementCharacter(char)
-            nameRestOutput = nameRestOutput.replace(char, replacement)
-        return nameFirstOutput + nameRestOutput
-
-    def getReplacementCharacter(self, char):
-        if char in self.replaceCache:
-            replacement = self.replaceCache[char]
-        else:
-            replacement = self.escapeChar(char)
-        return replacement
-
-    def fromXmlName(self, name):
-        for item in set(self.replacementRegexp.findall(name)):
-            name = name.replace(item, self.unescapeChar(item))
-        return name
-
-    def escapeChar(self, char):
-        replacement = "U%05X" % ord(char)
-        self.replaceCache[char] = replacement
-        return replacement
-
-    def unescapeChar(self, charcode):
-        return chr(int(charcode[1:], 16))

http://git-wip-us.apache.org/repos/asf/incubator-senssoft-tap/blob/6a81d1e7/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.py
----------------------------------------------------------------------
diff --git 
a/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.py 
b/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.py
deleted file mode 100644
index 7c5639f..0000000
--- a/env2/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.py
+++ /dev/null
@@ -1,923 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from pip._vendor.six import text_type, binary_type
-from pip._vendor.six.moves import http_client, urllib
-
-import codecs
-import re
-
-from pip._vendor import webencodings
-
-from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import ReparseException
-from . import _utils
-
-from io import StringIO
-
-try:
-    from io import BytesIO
-except ImportError:
-    BytesIO = StringIO
-
-# Non-unicode versions of constants for use in the pre-parser
-spaceCharactersBytes = frozenset([item.encode("ascii") for item in 
spaceCharacters])
-asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
-asciiUppercaseBytes = frozenset([item.encode("ascii") for item in 
asciiUppercase])
-spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
-
-
-invalid_unicode_no_surrogate = 
"[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"
  # noqa
-
-if _utils.supports_lone_surrogates:
-    # Use one extra step of indirection and create surrogates with
-    # eval. Not using this indirection would introduce an illegal
-    # unicode literal on platforms not supporting such lone
-    # surrogates.
-    assert invalid_unicode_no_surrogate[-1] == "]" and 
invalid_unicode_no_surrogate.count("]") == 1
-    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
-                                    eval('"\\uD800-\\uDFFF"') +  # 
pylint:disable=eval-used
-                                    "]")
-else:
-    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
-
-non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
-                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
-                                  0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
-                                  0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
-                                  0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
-                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
-                                  0x10FFFE, 0x10FFFF])
-
-ascii_punctuation_re = 
re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
-
-# Cache for charsUntil()
-charsUntilRegEx = {}
-
-
-class BufferedStream(object):
-    """Buffering for streams that do not have buffering of their own
-
-    The buffer is implemented as a list of chunks on the assumption that
-    joining many strings will be slow since it is O(n**2)
-    """
-
-    def __init__(self, stream):
-        self.stream = stream
-        self.buffer = []
-        self.position = [-1, 0]  # chunk number, offset
-
-    def tell(self):
-        pos = 0
-        for chunk in self.buffer[:self.position[0]]:
-            pos += len(chunk)
-        pos += self.position[1]
-        return pos
-
-    def seek(self, pos):
-        assert pos <= self._bufferedBytes()
-        offset = pos
-        i = 0
-        while len(self.buffer[i]) < offset:
-            offset -= len(self.buffer[i])
-            i += 1
-        self.position = [i, offset]
-
-    def read(self, bytes):
-        if not self.buffer:
-            return self._readStream(bytes)
-        elif (self.position[0] == len(self.buffer) and
-              self.position[1] == len(self.buffer[-1])):
-            return self._readStream(bytes)
-        else:
-            return self._readFromBuffer(bytes)
-
-    def _bufferedBytes(self):
-        return sum([len(item) for item in self.buffer])
-
-    def _readStream(self, bytes):
-        data = self.stream.read(bytes)
-        self.buffer.append(data)
-        self.position[0] += 1
-        self.position[1] = len(data)
-        return data
-
-    def _readFromBuffer(self, bytes):
-        remainingBytes = bytes
-        rv = []
-        bufferIndex = self.position[0]
-        bufferOffset = self.position[1]
-        while bufferIndex < len(self.buffer) and remainingBytes != 0:
-            assert remainingBytes > 0
-            bufferedData = self.buffer[bufferIndex]
-
-            if remainingBytes <= len(bufferedData) - bufferOffset:
-                bytesToRead = remainingBytes
-                self.position = [bufferIndex, bufferOffset + bytesToRead]
-            else:
-                bytesToRead = len(bufferedData) - bufferOffset
-                self.position = [bufferIndex, len(bufferedData)]
-                bufferIndex += 1
-            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
-            remainingBytes -= bytesToRead
-
-            bufferOffset = 0
-
-        if remainingBytes:
-            rv.append(self._readStream(remainingBytes))
-
-        return b"".join(rv)
-
-
-def HTMLInputStream(source, **kwargs):
-    # Work around Python bug #20007: read(0) closes the connection.
-    # http://bugs.python.org/issue20007
-    if (isinstance(source, http_client.HTTPResponse) or
-        # Also check for addinfourl wrapping HTTPResponse
-        (isinstance(source, urllib.response.addbase) and
-         isinstance(source.fp, http_client.HTTPResponse))):
-        isUnicode = False
-    elif hasattr(source, "read"):
-        isUnicode = isinstance(source.read(0), text_type)
-    else:
-        isUnicode = isinstance(source, text_type)
-
-    if isUnicode:
-        encodings = [x for x in kwargs if x.endswith("_encoding")]
-        if encodings:
-            raise TypeError("Cannot set an encoding with a unicode input, set 
%r" % encodings)
-
-        return HTMLUnicodeInputStream(source, **kwargs)
-    else:
-        return HTMLBinaryInputStream(source, **kwargs)
-
-
-class HTMLUnicodeInputStream(object):
-    """Provides a unicode stream of characters to the HTMLTokenizer.
-
-    This class takes care of character encoding and removing or replacing
-    incorrect byte-sequences and also provides column and line tracking.
-
-    """
-
-    _defaultChunkSize = 10240
-
-    def __init__(self, source):
-        """Initialises the HTMLInputStream.
-
-        HTMLInputStream(source, [encoding]) -> Normalized stream from source
-        for use by html5lib.
-
-        source can be either a file-object, local filename or a string.
-
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
-
-        """
-
-        if not _utils.supports_lone_surrogates:
-            # Such platforms will have already checked for such
-            # surrogate errors, so no need to do this checking.
-            self.reportCharacterErrors = None
-        elif len("\U0010FFFF") == 1:
-            self.reportCharacterErrors = self.characterErrorsUCS4
-        else:
-            self.reportCharacterErrors = self.characterErrorsUCS2
-
-        # List of where new lines occur
-        self.newLines = [0]
-
-        self.charEncoding = (lookupEncoding("utf-8"), "certain")
-        self.dataStream = self.openStream(source)
-
-        self.reset()
-
-    def reset(self):
-        self.chunk = ""
-        self.chunkSize = 0
-        self.chunkOffset = 0
-        self.errors = []
-
-        # number of (complete) lines in previous chunks
-        self.prevNumLines = 0
-        # number of columns in the last line of the previous chunk
-        self.prevNumCols = 0
-
-        # Deal with CR LF and surrogates split over chunk boundaries
-        self._bufferedCharacter = None
-
-    def openStream(self, source):
-        """Produces a file object from source.
-
-        source can be either a file object, local filename or a string.
-
-        """
-        # Already a file object
-        if hasattr(source, 'read'):
-            stream = source
-        else:
-            stream = StringIO(source)
-
-        return stream
-
-    def _position(self, offset):
-        chunk = self.chunk
-        nLines = chunk.count('\n', 0, offset)
-        positionLine = self.prevNumLines + nLines
-        lastLinePos = chunk.rfind('\n', 0, offset)
-        if lastLinePos == -1:
-            positionColumn = self.prevNumCols + offset
-        else:
-            positionColumn = offset - (lastLinePos + 1)
-        return (positionLine, positionColumn)
-
-    def position(self):
-        """Returns (line, col) of the current position in the stream."""
-        line, col = self._position(self.chunkOffset)
-        return (line + 1, col)
-
-    def char(self):
-        """ Read one character from the stream or queue if available. Return
-            EOF when EOF is reached.
-        """
-        # Read a new chunk from the input stream if necessary
-        if self.chunkOffset >= self.chunkSize:
-            if not self.readChunk():
-                return EOF
-
-        chunkOffset = self.chunkOffset
-        char = self.chunk[chunkOffset]
-        self.chunkOffset = chunkOffset + 1
-
-        return char
-
-    def readChunk(self, chunkSize=None):
-        if chunkSize is None:
-            chunkSize = self._defaultChunkSize
-
-        self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
-
-        self.chunk = ""
-        self.chunkSize = 0
-        self.chunkOffset = 0
-
-        data = self.dataStream.read(chunkSize)
-
-        # Deal with CR LF and surrogates broken across chunks
-        if self._bufferedCharacter:
-            data = self._bufferedCharacter + data
-            self._bufferedCharacter = None
-        elif not data:
-            # We have no more data, bye-bye stream
-            return False
-
-        if len(data) > 1:
-            lastv = ord(data[-1])
-            if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
-                self._bufferedCharacter = data[-1]
-                data = data[:-1]
-
-        if self.reportCharacterErrors:
-            self.reportCharacterErrors(data)
-
-        # Replace invalid characters
-        data = data.replace("\r\n", "\n")
-        data = data.replace("\r", "\n")
-
-        self.chunk = data
-        self.chunkSize = len(data)
-
-        return True
-
-    def characterErrorsUCS4(self, data):
-        for _ in range(len(invalid_unicode_re.findall(data))):
-            self.errors.append("invalid-codepoint")
-
-    def characterErrorsUCS2(self, data):
-        # Someone picked the wrong compile option
-        # You lose
-        skip = False
-        for match in invalid_unicode_re.finditer(data):
-            if skip:
-                continue
-            codepoint = ord(match.group())
-            pos = match.start()
-            # Pretty sure there should be endianness issues here
-            if _utils.isSurrogatePair(data[pos:pos + 2]):
-                # We have a surrogate pair!
-                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
-                if char_val in non_bmp_invalid_codepoints:
-                    self.errors.append("invalid-codepoint")
-                skip = True
-            elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
-                  pos == len(data) - 1):
-                self.errors.append("invalid-codepoint")
-            else:
-                skip = False
-                self.errors.append("invalid-codepoint")
-
-    def charsUntil(self, characters, opposite=False):
-        """ Returns a string of characters from the stream up to but not
-        including any character in 'characters' or EOF. 'characters' must be
-        a container that supports the 'in' method and iteration over its
-        characters.
-        """
-
-        # Use a cache of regexps to find the required characters
-        try:
-            chars = charsUntilRegEx[(characters, opposite)]
-        except KeyError:
-            if __debug__:
-                for c in characters:
-                    assert(ord(c) < 128)
-            regex = "".join(["\\x%02x" % ord(c) for c in characters])
-            if not opposite:
-                regex = "^%s" % regex
-            chars = charsUntilRegEx[(characters, opposite)] = 
re.compile("[%s]+" % regex)
-
-        rv = []
-
-        while True:
-            # Find the longest matching prefix
-            m = chars.match(self.chunk, self.chunkOffset)
-            if m is None:
-                # If nothing matched, and it wasn't because we ran out of 
chunk,
-                # then stop
-                if self.chunkOffset != self.chunkSize:
-                    break
-            else:
-                end = m.end()
-                # If not the whole chunk matched, return everything
-                # up to the part that didn't match
-                if end != self.chunkSize:
-                    rv.append(self.chunk[self.chunkOffset:end])
-                    self.chunkOffset = end
-                    break
-            # If the whole remainder of the chunk matched,
-            # use it all and read the next chunk
-            rv.append(self.chunk[self.chunkOffset:])
-            if not self.readChunk():
-                # Reached EOF
-                break
-
-        r = "".join(rv)
-        return r
-
-    def unget(self, char):
-        # Only one character is allowed to be ungotten at once - it must
-        # be consumed again before any further call to unget
-        if char is not None:
-            if self.chunkOffset == 0:
-                # unget is called quite rarely, so it's a good idea to do
-                # more work here if it saves a bit of work in the frequently
-                # called char and charsUntil.
-                # So, just prepend the ungotten character onto the current
-                # chunk:
-                self.chunk = char + self.chunk
-                self.chunkSize += 1
-            else:
-                self.chunkOffset -= 1
-                assert self.chunk[self.chunkOffset] == char
-
-
-class HTMLBinaryInputStream(HTMLUnicodeInputStream):
-    """Provides a unicode stream of characters to the HTMLTokenizer.
-
-    This class takes care of character encoding and removing or replacing
-    incorrect byte-sequences and also provides column and line tracking.
-
-    """
-
-    def __init__(self, source, override_encoding=None, transport_encoding=None,
-                 same_origin_parent_encoding=None, likely_encoding=None,
-                 default_encoding="windows-1252", useChardet=True):
-        """Initialises the HTMLInputStream.
-
-        HTMLInputStream(source, [encoding]) -> Normalized stream from source
-        for use by html5lib.
-
-        source can be either a file-object, local filename or a string.
-
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
-
-        """
-        # Raw Stream - for unicode objects this will encode to utf-8 and set
-        #              self.charEncoding as appropriate
-        self.rawStream = self.openStream(source)
-
-        HTMLUnicodeInputStream.__init__(self, self.rawStream)
-
-        # Encoding Information
-        # Number of bytes to use when looking for a meta element with
-        # encoding information
-        self.numBytesMeta = 1024
-        # Number of bytes to use when using detecting encoding using chardet
-        self.numBytesChardet = 100
-        # Things from args
-        self.override_encoding = override_encoding
-        self.transport_encoding = transport_encoding
-        self.same_origin_parent_encoding = same_origin_parent_encoding
-        self.likely_encoding = likely_encoding
-        self.default_encoding = default_encoding
-
-        # Determine encoding
-        self.charEncoding = self.determineEncoding(useChardet)
-        assert self.charEncoding[0] is not None
-
-        # Call superclass
-        self.reset()
-
-    def reset(self):
-        self.dataStream = 
self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
-        HTMLUnicodeInputStream.reset(self)
-
-    def openStream(self, source):
-        """Produces a file object from source.
-
-        source can be either a file object, local filename or a string.
-
-        """
-        # Already a file object
-        if hasattr(source, 'read'):
-            stream = source
-        else:
-            stream = BytesIO(source)
-
-        try:
-            stream.seek(stream.tell())
-        except:  # pylint:disable=bare-except
-            stream = BufferedStream(stream)
-
-        return stream
-
-    def determineEncoding(self, chardet=True):
-        # BOMs take precedence over everything
-        # This will also read past the BOM if present
-        charEncoding = self.detectBOM(), "certain"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # If we've been overriden, we've been overriden
-        charEncoding = lookupEncoding(self.override_encoding), "certain"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Now check the transport layer
-        charEncoding = lookupEncoding(self.transport_encoding), "certain"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Look for meta elements with encoding information
-        charEncoding = self.detectEncodingMeta(), "tentative"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Parent document encoding
-        charEncoding = lookupEncoding(self.same_origin_parent_encoding), 
"tentative"
-        if charEncoding[0] is not None and not 
charEncoding[0].name.startswith("utf-16"):
-            return charEncoding
-
-        # "likely" encoding
-        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Guess with chardet, if available
-        if chardet:
-            try:
-                from chardet.universaldetector import UniversalDetector
-            except ImportError:
-                pass
-            else:
-                buffers = []
-                detector = UniversalDetector()
-                while not detector.done:
-                    buffer = self.rawStream.read(self.numBytesChardet)
-                    assert isinstance(buffer, bytes)
-                    if not buffer:
-                        break
-                    buffers.append(buffer)
-                    detector.feed(buffer)
-                detector.close()
-                encoding = lookupEncoding(detector.result['encoding'])
-                self.rawStream.seek(0)
-                if encoding is not None:
-                    return encoding, "tentative"
-
-        # Try the default encoding
-        charEncoding = lookupEncoding(self.default_encoding), "tentative"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Fallback to html5lib's default if even that hasn't worked
-        return lookupEncoding("windows-1252"), "tentative"
-
-    def changeEncoding(self, newEncoding):
-        assert self.charEncoding[1] != "certain"
-        newEncoding = lookupEncoding(newEncoding)
-        if newEncoding is None:
-            return
-        if newEncoding.name in ("utf-16be", "utf-16le"):
-            newEncoding = lookupEncoding("utf-8")
-            assert newEncoding is not None
-        elif newEncoding == self.charEncoding[0]:
-            self.charEncoding = (self.charEncoding[0], "certain")
-        else:
-            self.rawStream.seek(0)
-            self.charEncoding = (newEncoding, "certain")
-            self.reset()
-            raise ReparseException("Encoding changed from %s to %s" % 
(self.charEncoding[0], newEncoding))
-
-    def detectBOM(self):
-        """Attempts to detect at BOM at the start of the stream. If
-        an encoding can be determined from the BOM return the name of the
-        encoding otherwise return None"""
-        bomDict = {
-            codecs.BOM_UTF8: 'utf-8',
-            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
-            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
-        }
-
-        # Go to beginning of file and read in 4 bytes
-        string = self.rawStream.read(4)
-        assert isinstance(string, bytes)
-
-        # Try detecting the BOM using bytes from the string
-        encoding = bomDict.get(string[:3])         # UTF-8
-        seek = 3
-        if not encoding:
-            # Need to detect UTF-32 before UTF-16
-            encoding = bomDict.get(string)         # UTF-32
-            seek = 4
-            if not encoding:
-                encoding = bomDict.get(string[:2])  # UTF-16
-                seek = 2
-
-        # Set the read position past the BOM if one was found, otherwise
-        # set it to the start of the stream
-        if encoding:
-            self.rawStream.seek(seek)
-            return lookupEncoding(encoding)
-        else:
-            self.rawStream.seek(0)
-            return None
-
-    def detectEncodingMeta(self):
-        """Report the encoding declared by the meta element
-        """
-        buffer = self.rawStream.read(self.numBytesMeta)
-        assert isinstance(buffer, bytes)
-        parser = EncodingParser(buffer)
-        self.rawStream.seek(0)
-        encoding = parser.getEncoding()
-
-        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
-            encoding = lookupEncoding("utf-8")
-
-        return encoding
-
-
-class EncodingBytes(bytes):
-    """String-like object with an associated position and various extra methods
-    If the position is ever greater than the string length then an exception is
-    raised"""
-    def __new__(self, value):
-        assert isinstance(value, bytes)
-        return bytes.__new__(self, value.lower())
-
-    def __init__(self, value):
-        # pylint:disable=unused-argument
-        self._position = -1
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        p = self._position = self._position + 1
-        if p >= len(self):
-            raise StopIteration
-        elif p < 0:
-            raise TypeError
-        return self[p:p + 1]
-
-    def next(self):
-        # Py2 compat
-        return self.__next__()
-
-    def previous(self):
-        p = self._position
-        if p >= len(self):
-            raise StopIteration
-        elif p < 0:
-            raise TypeError
-        self._position = p = p - 1
-        return self[p:p + 1]
-
-    def setPosition(self, position):
-        if self._position >= len(self):
-            raise StopIteration
-        self._position = position
-
-    def getPosition(self):
-        if self._position >= len(self):
-            raise StopIteration
-        if self._position >= 0:
-            return self._position
-        else:
-            return None
-
-    position = property(getPosition, setPosition)
-
-    def getCurrentByte(self):
-        return self[self.position:self.position + 1]
-
-    currentByte = property(getCurrentByte)
-
-    def skip(self, chars=spaceCharactersBytes):
-        """Skip past a list of characters"""
-        p = self.position               # use property for the error-checking
-        while p < len(self):
-            c = self[p:p + 1]
-            if c not in chars:
-                self._position = p
-                return c
-            p += 1
-        self._position = p
-        return None
-
-    def skipUntil(self, chars):
-        p = self.position
-        while p < len(self):
-            c = self[p:p + 1]
-            if c in chars:
-                self._position = p
-                return c
-            p += 1
-        self._position = p
-        return None
-
-    def matchBytes(self, bytes):
-        """Look for a sequence of bytes at the start of a string. If the bytes
-        are found return True and advance the position to the byte after the
-        match. Otherwise return False and leave the position alone"""
-        p = self.position
-        data = self[p:p + len(bytes)]
-        rv = data.startswith(bytes)
-        if rv:
-            self.position += len(bytes)
-        return rv
-
-    def jumpTo(self, bytes):
-        """Look for the next sequence of bytes matching a given sequence. If
-        a match is found advance the position to the last byte of the match"""
-        newPosition = self[self.position:].find(bytes)
-        if newPosition > -1:
-            # XXX: This is ugly, but I can't see a nicer way to fix this.
-            if self._position == -1:
-                self._position = 0
-            self._position += (newPosition + len(bytes) - 1)
-            return True
-        else:
-            raise StopIteration
-
-
-class EncodingParser(object):
-    """Mini parser for detecting character encoding from meta elements"""
-
-    def __init__(self, data):
-        """string - the data to work on for encoding detection"""
-        self.data = EncodingBytes(data)
-        self.encoding = None
-
-    def getEncoding(self):
-        methodDispatch = (
-            (b"<!--", self.handleComment),
-            (b"<meta", self.handleMeta),
-            (b"</", self.handlePossibleEndTag),
-            (b"<!", self.handleOther),
-            (b"<?", self.handleOther),
-            (b"<", self.handlePossibleStartTag))
-        for _ in self.data:
-            keepParsing = True
-            for key, method in methodDispatch:
-                if self.data.matchBytes(key):
-                    try:
-                        keepParsing = method()
-                        break
-                    except StopIteration:
-                        keepParsing = False
-                        break
-            if not keepParsing:
-                break
-
-        return self.encoding
-
-    def handleComment(self):
-        """Skip over comments"""
-        return self.data.jumpTo(b"-->")
-
-    def handleMeta(self):
-        if self.data.currentByte not in spaceCharactersBytes:
-            # if we have <meta not followed by a space so just keep going
-            return True
-        # We have a valid meta element we want to search for attributes
-        hasPragma = False
-        pendingEncoding = None
-        while True:
-            # Try to find the next attribute after the current position
-            attr = self.getAttribute()
-            if attr is None:
-                return True
-            else:
-                if attr[0] == b"http-equiv":
-                    hasPragma = attr[1] == b"content-type"
-                    if hasPragma and pendingEncoding is not None:
-                        self.encoding = pendingEncoding
-                        return False
-                elif attr[0] == b"charset":
-                    tentativeEncoding = attr[1]
-                    codec = lookupEncoding(tentativeEncoding)
-                    if codec is not None:
-                        self.encoding = codec
-                        return False
-                elif attr[0] == b"content":
-                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
-                    tentativeEncoding = contentParser.parse()
-                    if tentativeEncoding is not None:
-                        codec = lookupEncoding(tentativeEncoding)
-                        if codec is not None:
-                            if hasPragma:
-                                self.encoding = codec
-                                return False
-                            else:
-                                pendingEncoding = codec
-
-    def handlePossibleStartTag(self):
-        return self.handlePossibleTag(False)
-
-    def handlePossibleEndTag(self):
-        next(self.data)
-        return self.handlePossibleTag(True)
-
-    def handlePossibleTag(self, endTag):
-        data = self.data
-        if data.currentByte not in asciiLettersBytes:
-            # If the next byte is not an ascii letter either ignore this
-            # fragment (possible start tag case) or treat it according to
-            # handleOther
-            if endTag:
-                data.previous()
-                self.handleOther()
-            return True
-
-        c = data.skipUntil(spacesAngleBrackets)
-        if c == b"<":
-            # return to the first step in the overall "two step" algorithm
-            # reprocessing the < byte
-            data.previous()
-        else:
-            # Read all attributes
-            attr = self.getAttribute()
-            while attr is not None:
-                attr = self.getAttribute()
-        return True
-
-    def handleOther(self):
-        return self.data.jumpTo(b">")
-
-    def getAttribute(self):
-        """Return a name,value pair for the next attribute in the stream,
-        if one is found, or None"""
-        data = self.data
-        # Step 1 (skip chars)
-        c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
-        assert c is None or len(c) == 1
-        # Step 2
-        if c in (b">", None):
-            return None
-        # Step 3
-        attrName = []
-        attrValue = []
-        # Step 4 attribute name
-        while True:
-            if c == b"=" and attrName:
-                break
-            elif c in spaceCharactersBytes:
-                # Step 6!
-                c = data.skip()
-                break
-            elif c in (b"/", b">"):
-                return b"".join(attrName), b""
-            elif c in asciiUppercaseBytes:
-                attrName.append(c.lower())
-            elif c is None:
-                return None
-            else:
-                attrName.append(c)
-            # Step 5
-            c = next(data)
-        # Step 7
-        if c != b"=":
-            data.previous()
-            return b"".join(attrName), b""
-        # Step 8
-        next(data)
-        # Step 9
-        c = data.skip()
-        # Step 10
-        if c in (b"'", b'"'):
-            # 10.1
-            quoteChar = c
-            while True:
-                # 10.2
-                c = next(data)
-                # 10.3
-                if c == quoteChar:
-                    next(data)
-                    return b"".join(attrName), b"".join(attrValue)
-                # 10.4
-                elif c in asciiUppercaseBytes:
-                    attrValue.append(c.lower())
-                # 10.5
-                else:
-                    attrValue.append(c)
-        elif c == b">":
-            return b"".join(attrName), b""
-        elif c in asciiUppercaseBytes:
-            attrValue.append(c.lower())
-        elif c is None:
-            return None
-        else:
-            attrValue.append(c)
-        # Step 11
-        while True:
-            c = next(data)
-            if c in spacesAngleBrackets:
-                return b"".join(attrName), b"".join(attrValue)
-            elif c in asciiUppercaseBytes:
-                attrValue.append(c.lower())
-            elif c is None:
-                return None
-            else:
-                attrValue.append(c)
-
-
-class ContentAttrParser(object):
-    def __init__(self, data):
-        assert isinstance(data, bytes)
-        self.data = data
-
-    def parse(self):
-        try:
-            # Check if the attr name is charset
-            # otherwise return
-            self.data.jumpTo(b"charset")
-            self.data.position += 1
-            self.data.skip()
-            if not self.data.currentByte == b"=":
-                # If there is no = sign keep looking for attrs
-                return None
-            self.data.position += 1
-            self.data.skip()
-            # Look for an encoding between matching quote marks
-            if self.data.currentByte in (b'"', b"'"):
-                quoteMark = self.data.currentByte
-                self.data.position += 1
-                oldPosition = self.data.position
-                if self.data.jumpTo(quoteMark):
-                    return self.data[oldPosition:self.data.position]
-                else:
-                    return None
-            else:
-                # Unquoted value
-                oldPosition = self.data.position
-                try:
-                    self.data.skipUntil(spaceCharactersBytes)
-                    return self.data[oldPosition:self.data.position]
-                except StopIteration:
-                    # Return the whole remaining value
-                    return self.data[oldPosition:]
-        except StopIteration:
-            return None
-
-
-def lookupEncoding(encoding):
-    """Return the python codec name corresponding to an encoding or None if the
-    string doesn't correspond to a valid encoding."""
-    if isinstance(encoding, binary_type):
-        try:
-            encoding = encoding.decode("ascii")
-        except UnicodeDecodeError:
-            return None
-
-    if encoding is not None:
-        try:
-            return webencodings.lookup(encoding)
-        except AttributeError:
-            return None
-    else:
-        return None

Reply via email to