Yes, python parser took all the links. For Nim need wrapper over lxml - is one
of the best parser xml html.
import requests
import lxml.html
main_page = "http://old.minjust.gov.ua/19612"
session = requests.session()
session.headers ={
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/53.0.2785.101 Safari/537.36 OPR/40.0.2308.62",
"Accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"DNT": "1",
"Referer": main_page,
"Accept-Encoding": "gzip, deflate, lzma, sdch",
"Accept-Language": "ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4"
}
page = session.get(main_page)
parser = lxml.html.fromstring(page.text)
anchors = parser.cssselect('div.document a')
cnt=0
for a in anchors:
cnt+=1
print("[{}|{}|{}".format(cnt,a.attrib["href"],a.text))