I can not parse HTML wild.**findall** doesn't return all elements from the div
element with the class _documents_
import httpclient
import htmlparser
import tables
import strutils
import streams
import xmltree
import strtabs
var main_page = "http://old.minjust.gov.ua/19612"
var headers_dict = {
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/53.0.2785.101 Safari/537.36 OPR/40.0.2308.62",
"Accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"DNT": "1",
"Referer": main_page,
"Accept-Encoding": "gzip, deflate, lzma, sdch",
"Accept-Language": "ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4"
}.toTable
var headers = ""
for k,v in headers_dict:
headers&= k & ":" & v & "\c\L"
var resp = httpclient.get(main_page,headers)
var stream = newStringStream(resp.body)
var html = htmlparser.parseHtml(stream)
var cnt=0
for elem in html.findall("div"):
if elem.attr("class") == "document":
for a in elem.findall("a"):
cnt+=1
echo cnt,"|", a.attrs["href"], "|", a.innerText
break
It should be 809 instead of 327. Here on this element ends up:
<li><a href="/file/1495">Крівова проти України - <b>19.12.2012</b></li></a>
He was wrong. But what about me? PS: Nim version 14.