Hello everybody,

I'm building a web crawler.

Started with python but the need for speed led me to libcurl.

The obstacle I'm having is: when using the multi interface a big part of the the connections cloe because of the error "Could not resolve host:"

What I've tried:

1 - Confirmed that all URLs are correct.

2 - Confirmed that using the easy interface URLs are resolved.

3 - Confirmed that other programs like dig, resolvectl, nslookup can resolve URLs that multi interface couldn't.

4 - Set a "reputable" DNS server doing: curl_easy_setopt(handles[i], CURLOPT_DNS_SERVERS, "8.8.8.8");.

5 - In the file /etc/nsswitch.conf, line "hosts: files nDNS[NOTFOUNDED=return] dns" I deleted  [NOTFOUNDED=return].

6 - I've turned verbose on - Doesn't help me much.

I don't know what to do more as it seems a problem inside the library, so i posted here before going digging in the library.


System info:

O.S. - Ubuntu 22.04.3 LTS

Harware - HP HP Pavilion Laptop 14-ce3xxx

Processor - Intel® Core™ i7-1065G7 CPU @ 1.30GHz × 8

libcurl version - 7.81.0


Attached I send the source code file I'm using do debug this problem.


Best Regards,

Luis Figueira

/* ------------------------------------------
   DESCRIÇÃO

   Crawler de todos os sites de TLD .pt
   Detectar todas as diferentes estruturas de html (tipos de pagina web) 
        Não usar HTML parser para não abrandar o crawl?
        Usar apenas os characteres que seguem o "<"
   Guardar o URL de todas essas pagina

   -------------------------------------------*/
#include <curl/curl.h>
#include <stdio.h>
#include <time.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/sysinfo.h>
#include <string.h>
#include <search.h>
#include <sys/queue.h>

#define NUM_SEEDS 600
#define NUM_HEADERS 11

const char * const HEADERS[NUM_HEADERS] = {
   "Accept: text/html,application/xhtml+xml,application/xml q=0.9,image/avif,image/webp,image/apng,*/* q=0.8,application/signed-exchange;v=b3;q=0.7",
   "Accept-Encoding: gzip, br",
   "Accept-Language: pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7",
   "Device-Memory: 8",
   "Downlink: 100",
   "Sec-Ch-Ua: \"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"",
   "Sec-Ch-Ua-Arch: \"x86\"",
   "Sec-Ch-Ua-Platform: \"Linux\"",
   "Connection: keep-alive",
   "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
   "Viewport-Width: 1920"};

const char * const seeds[NUM_SEEDS] = {
"https://melhores-sites.pt";,
"https://pt.trustpilot.com";,
"https://ccm.marinha.pt";,
"https://www.natgeo.pt";,
"https://www.zoo.pt";,
"https://arcadenoe.pt";,
"https://revistajardins.pt";,
"https://www.meganimal.pt";,
"https://www.miscota.pt";,
"https://www.zooplus.pt";,
"https://www.tiendanimal.pt";,
"https://www.kiwoko.pt";,
"https://www.petoutlet.pt";,
"https://europlantas.pt";,
"https://www.pinterest.pt";,
"https://www.ipma.pt";,
"https://www.google.pt";,
"https://www.teleculinaria.pt";,
"https://www.vaqueiro.pt";,
"https://www.pingodoce.pt";,
"https://lifestyle.sapo.pt";,
"https://feed.continente.pt";,
"https://saboreiaavida.nestle.pt";,
"https://www.ivdp.pt";,
"https://www.revistadevinhos.pt";,
"https://www.vinhosdoalentejo.pt";,
"https://www.drinksco.pt";,
"https://www.cascawines.pt";,
"https://www.clubevinhosportugueses.pt";,
"https://www.onwine.pt";,
"https://www.vinha.pt";,
"https://www.garrafeirasoares.pt";,
"https://wine.pt";,
"https://estadoliquido.pt";,
"https://receitasangola.blogspot.pt";,
"https://www.apn.org.pt";,
"https://www.thefork.pt";,
"https://nutrimento.pt";,
"https://www.arodadaalimentacao.pt";,
"https://www.nit.pt";,
"https://www.viversaudavel.pt";,
"https://media.rtp.pt";,
"https://sic.pt";,
"https://www.daamaoafloresta.pt";,
"https://www.radiomiudos.pt";,
"https://bebes.kazulo.pt";,
"https://canalpanda.pt";,
"https://tv.disney.pt";,
"https://www.nickelodeon.pt";,
"https://estrelaseouricos.sapo.pt";,
"https://www.cartoonnetwork.pt";,
"https://www.coronakids.pt";,
"https://poki.pt";,
"https://www.1001jogos.pt";,
"https://www.ajudadeberco.pt";,
"https://www.cnpdpcj.gov.pt";,
"https://www.apfn.com.pt";,
"https://bledina.pt";,
"https://nutriben.pt";,
"https://empresa.nestle.pt";,
"https://www.imaginarium.pt";,
"https://www.chicco.pt";,
"https://www.toysrus.pt";,
"https://www.disneylandparis.pt";,
"https://www.oceanario.pt";,
"https://www.kidzania.pt";,
"https://lisboastorycentre.pt";,
"https://www.zoomarine.pt";,
"https://arquitectos.pt";,
"https://www.lnec.pt";,
"https://www.ordemengenheiros.pt";,
"https://www.oet.pt";,
"https://www.plus500.pt";,
"https://www.cmvm.pt";,
"https://www.investirnabolsa.pt";,
"https://www.deco.proteste.pt";,
"https://www.jornaldenegocios.pt";,
"https://www.rankia.pt";,
"https://observador.pt";,
"https://www.rbe.mec.pt";,
"https://cvc.instituto-camoes.pt";,
"https://bndigital.bnportugal.gov.pt";,
"https://www.b-on.pt";,
"https://www.priberam.pt";,
"https://www.infopedia.pt";,
"https://www.dgeste.mec.pt";,
"https://www.escolavirtual.pt";,
"https://ensina.rtp.pt";,
"https://www.seguranet.pt";,
"https://www.universia.pt";,
"https://www.forum.pt";,
"https://ciberduvidas.iscte-iul.pt";,
"https://orientacao-vocacional.com.pt";,
"https://www.educare.pt";,
"https://www.dge.mec.pt";,
"https://www.dges.gov.pt";,
"https://www.igec.mec.pt";,
"https://portaldasmatriculas.edu.gov.pt";,
"https://www.dgeec.mec.pt";,
"https://www.pnl2027.gov.pt";,
"https://www.gee.gov.pt";,
"https://www.portoeditora.pt";,
"https://iave.pt";,
"https://www.fenprof.pt";,
"https://fne.pt";,
"https://www.sepleu.pt";,
"https://sindep.pt";,
"https://www.sipe.pt";,
"https://www.spliu.pt";,
"https://www.aspl.webhs.pt";,
"https://www.spzn.pt";,
"https://www.spzs.pt";,
"https://www.spzc.pt";,
"https://www.sdpa.pt";,
"https://anprofessores.pt";,
"https://aspl.webhs.pt";,
"https://apei.pt";,
"https://manuaisescolares.pt";,
"https://dislexia.pt";,
"https://hiperatividade.com.pt";,
"https://www.portalbullying.com.pt";,
"https://www.timeout.pt";,
"https://www.viagogo.pt";,
"https://www.agendalx.pt";,
"https://www.guiadacidade.pt";,
"https://viva-porto.pt";,
"https://www.cnc.pt";,
"https://www.culturgest.pt";,
"https://www.e-cultura.pt";,
"https://eventosemportugal.pt";,
"https://www.tripadvisor.pt";,
"https://www.ccb.pt";,
"https://gulbenkian.pt";,
"https://www.serralves.pt";,
"https://arena.altice.pt";,
"https://www.coliseu.pt";,
"https://cae.pt";,
"https://www.bol.pt";,
"https://bilheteira.fnac.pt";,
"https://ticketline.sapo.pt";,
"https://www.acad-ciencias.pt";,
"https://www.fct.pt";,
"https://www.pavconhecimento.pt";,
"https://www.cienciaviva.pt";,
"https://oal.ul.pt";,
"https://www.rcaap.pt";,
"https://scholar.google.pt";,
"https://www.museus.ulisboa.pt";,
"https://www.abarth.pt";,
"https://www.alfaromeo.pt";,
"https://www.audi.pt";,
"https://www.chevrolet.pt";,
"https://www.bmw.pt";,
"https://www.citroen.pt";,
"https://www.cupraofficial.pt";,
"https://www.dacia.pt";,
"https://www.fiat.pt";,
"https://www.honda.pt";,
"https://www.ford.pt";,
"https://www.hyundai.pt";,
"https://kia.pt";,
"https://www.lexus.pt";,
"https://www.mazda.pt";,
"https://www.mercedes-benz.pt";,
"https://www.mini.pt";,
"https://www.mitsubishi-motors.pt";,
"https://www.opel.pt";,
"https://www.nissan.pt";,
"https://www.peugeot.pt";,
"https://www.renault.pt";,
"https://www.seat.pt";,
"https://www.skoda-auto.pt";,
"https://www.suzuki.pt";,
"https://www.toyota.pt";,
"https://www.volkswagen.pt";,
"https://autoportal.iol.pt";,
"https://www.turbo.pt";,
"https://www.tuning.online.pt";,
"https://www.velocidades.pt";,
"https://www.circuito-estoril.pt";,
"https://volantesic.pt";,
"https://www.avis.com.pt";,
"https://www.budget.com.pt";,
"https://www.turiscar.pt";,
"https://www.autoeurope.pt";,
"https://www.skyscanner.pt";,
"https://auto.sapo.pt";,
"https://www.autousados.pt";,
"https://www.olx.pt";,
"https://www.piscapisca.pt";,
"https://www.autocompraevenda.pt";,
"https://www.autouncle.pt";,
"https://www.precoscombustiveis.dgeg.pt";,
"https://servicos.infraestruturasdeportugal.pt";,
"https://www.viaverde.pt";,
"https://www.brisa.pt";,
"https://www.acp.pt";,
"https://www.eparkempresas.pt";,
"https://cinemas.nos.pt";,
"https://www.ucicinemas.pt";,
"https://melhores-sites.pt";,
"https://pt.trustpilot.com";,
"https://ccm.marinha.pt";,
"https://www.natgeo.pt";,
"https://www.zoo.pt";,
"https://arcadenoe.pt";,
"https://revistajardins.pt";,
"https://www.meganimal.pt";,
"https://www.miscota.pt";,
"https://www.zooplus.pt";,
"https://www.tiendanimal.pt";,
"https://www.kiwoko.pt";,
"https://www.petoutlet.pt";,
"https://europlantas.pt";,
"https://www.pinterest.pt";,
"https://www.ipma.pt";,
"https://www.google.pt";,
"https://www.teleculinaria.pt";,
"https://www.vaqueiro.pt";,
"https://www.pingodoce.pt";,
"https://lifestyle.sapo.pt";,
"https://feed.continente.pt";,
"https://saboreiaavida.nestle.pt";,
"https://www.ivdp.pt";,
"https://www.revistadevinhos.pt";,
"https://www.vinhosdoalentejo.pt";,
"https://www.drinksco.pt";,
"https://www.cascawines.pt";,
"https://www.clubevinhosportugueses.pt";,
"https://www.onwine.pt";,
"https://www.vinha.pt";,
"https://www.garrafeirasoares.pt";,
"https://wine.pt";,
"https://estadoliquido.pt";,
"https://receitasangola.blogspot.pt";,
"https://www.apn.org.pt";,
"https://www.thefork.pt";,
"https://nutrimento.pt";,
"https://www.arodadaalimentacao.pt";,
"https://www.nit.pt";,
"https://www.viversaudavel.pt";,
"https://media.rtp.pt";,
"https://sic.pt";,
"https://www.daamaoafloresta.pt";,
"https://www.radiomiudos.pt";,
"https://bebes.kazulo.pt";,
"https://canalpanda.pt";,
"https://tv.disney.pt";,
"https://www.nickelodeon.pt";,
"https://estrelaseouricos.sapo.pt";,
"https://www.cartoonnetwork.pt";,
"https://www.coronakids.pt";,
"https://poki.pt";,
"https://www.1001jogos.pt";,
"https://www.ajudadeberco.pt";,
"https://www.cnpdpcj.gov.pt";,
"https://www.apfn.com.pt";,
"https://bledina.pt";,
"https://nutriben.pt";,
"https://empresa.nestle.pt";,
"https://www.imaginarium.pt";,
"https://www.chicco.pt";,
"https://www.toysrus.pt";,
"https://www.disneylandparis.pt";,
"https://www.oceanario.pt";,
"https://www.kidzania.pt";,
"https://lisboastorycentre.pt";,
"https://www.zoomarine.pt";,
"https://arquitectos.pt";,
"https://www.lnec.pt";,
"https://www.ordemengenheiros.pt";,
"https://www.oet.pt";,
"https://www.plus500.pt";,
"https://www.cmvm.pt";,
"https://www.investirnabolsa.pt";,
"https://www.deco.proteste.pt";,
"https://www.jornaldenegocios.pt";,
"https://www.rankia.pt";,
"https://observador.pt";,
"https://www.rbe.mec.pt";,
"https://cvc.instituto-camoes.pt";,
"https://bndigital.bnportugal.gov.pt";,
"https://www.b-on.pt";,
"https://www.priberam.pt";,
"https://www.infopedia.pt";,
"https://www.dgeste.mec.pt";,
"https://www.escolavirtual.pt";,
"https://ensina.rtp.pt";,
"https://www.seguranet.pt";,
"https://www.universia.pt";,
"https://www.forum.pt";,
"https://ciberduvidas.iscte-iul.pt";,
"https://orientacao-vocacional.com.pt";,
"https://www.educare.pt";,
"https://www.dge.mec.pt";,
"https://www.dges.gov.pt";,
"https://www.igec.mec.pt";,
"https://portaldasmatriculas.edu.gov.pt";,
"https://www.dgeec.mec.pt";,
"https://www.pnl2027.gov.pt";,
"https://www.gee.gov.pt";,
"https://www.portoeditora.pt";,
"https://iave.pt";,
"https://www.fenprof.pt";,
"https://fne.pt";,
"https://www.sepleu.pt";,
"https://sindep.pt";,
"https://www.sipe.pt";,
"https://www.spliu.pt";,
"https://www.aspl.webhs.pt";,
"https://www.spzn.pt";,
"https://www.spzs.pt";,
"https://www.spzc.pt";,
"https://www.sdpa.pt";,
"https://anprofessores.pt";,
"https://aspl.webhs.pt";,
"https://apei.pt";,
"https://manuaisescolares.pt";,
"https://dislexia.pt";,
"https://hiperatividade.com.pt";,
"https://www.portalbullying.com.pt";,
"https://www.timeout.pt";,
"https://www.viagogo.pt";,
"https://www.agendalx.pt";,
"https://www.guiadacidade.pt";,
"https://viva-porto.pt";,
"https://www.cnc.pt";,
"https://www.culturgest.pt";,
"https://www.e-cultura.pt";,
"https://eventosemportugal.pt";,
"https://www.tripadvisor.pt";,
"https://www.ccb.pt";,
"https://gulbenkian.pt";,
"https://www.serralves.pt";,
"https://arena.altice.pt";,
"https://www.coliseu.pt";,
"https://cae.pt";,
"https://www.bol.pt";,
"https://bilheteira.fnac.pt";,
"https://ticketline.sapo.pt";,
"https://www.acad-ciencias.pt";,
"https://www.fct.pt";,
"https://www.pavconhecimento.pt";,
"https://www.cienciaviva.pt";,
"https://oal.ul.pt";,
"https://www.rcaap.pt";,
"https://scholar.google.pt";,
"https://www.museus.ulisboa.pt";,
"https://www.abarth.pt";,
"https://www.alfaromeo.pt";,
"https://www.audi.pt";,
"https://www.chevrolet.pt";,
"https://www.bmw.pt";,
"https://www.citroen.pt";,
"https://www.cupraofficial.pt";,
"https://www.dacia.pt";,
"https://www.fiat.pt";,
"https://www.honda.pt";,
"https://www.ford.pt";,
"https://www.hyundai.pt";,
"https://kia.pt";,
"https://www.lexus.pt";,
"https://www.mazda.pt";,
"https://www.mercedes-benz.pt";,
"https://www.mini.pt";,
"https://www.mitsubishi-motors.pt";,
"https://www.opel.pt";,
"https://www.nissan.pt";,
"https://www.peugeot.pt";,
"https://www.renault.pt";,
"https://www.seat.pt";,
"https://www.skoda-auto.pt";,
"https://www.suzuki.pt";,
"https://www.toyota.pt";,
"https://www.volkswagen.pt";,
"https://autoportal.iol.pt";,
"https://www.turbo.pt";,
"https://www.tuning.online.pt";,
"https://www.velocidades.pt";,
"https://www.circuito-estoril.pt";,
"https://volantesic.pt";,
"https://www.avis.com.pt";,
"https://www.budget.com.pt";,
"https://www.turiscar.pt";,
"https://www.autoeurope.pt";,
"https://www.skyscanner.pt";,
"https://auto.sapo.pt";,
"https://www.autousados.pt";,
"https://www.olx.pt";,
"https://www.piscapisca.pt";,
"https://www.autocompraevenda.pt";,
"https://www.autouncle.pt";,
"https://www.precoscombustiveis.dgeg.pt";,
"https://servicos.infraestruturasdeportugal.pt";,
"https://www.viaverde.pt";,
"https://www.brisa.pt";,
"https://www.acp.pt";,
"https://www.eparkempresas.pt";,
"https://cinemas.nos.pt";,
"https://www.ucicinemas.pt";,
"https://melhores-sites.pt";,
"https://pt.trustpilot.com";,
"https://ccm.marinha.pt";,
"https://www.natgeo.pt";,
"https://www.zoo.pt";,
"https://arcadenoe.pt";,
"https://revistajardins.pt";,
"https://www.meganimal.pt";,
"https://www.miscota.pt";,
"https://www.zooplus.pt";,
"https://www.tiendanimal.pt";,
"https://www.kiwoko.pt";,
"https://www.petoutlet.pt";,
"https://europlantas.pt";,
"https://www.pinterest.pt";,
"https://www.ipma.pt";,
"https://www.google.pt";,
"https://www.teleculinaria.pt";,
"https://www.vaqueiro.pt";,
"https://www.pingodoce.pt";,
"https://lifestyle.sapo.pt";,
"https://feed.continente.pt";,
"https://saboreiaavida.nestle.pt";,
"https://www.ivdp.pt";,
"https://www.revistadevinhos.pt";,
"https://www.vinhosdoalentejo.pt";,
"https://www.drinksco.pt";,
"https://www.cascawines.pt";,
"https://www.clubevinhosportugueses.pt";,
"https://www.onwine.pt";,
"https://www.vinha.pt";,
"https://www.garrafeirasoares.pt";,
"https://wine.pt";,
"https://estadoliquido.pt";,
"https://receitasangola.blogspot.pt";,
"https://www.apn.org.pt";,
"https://www.thefork.pt";,
"https://nutrimento.pt";,
"https://www.arodadaalimentacao.pt";,
"https://www.nit.pt";,
"https://www.viversaudavel.pt";,
"https://media.rtp.pt";,
"https://sic.pt";,
"https://www.daamaoafloresta.pt";,
"https://www.radiomiudos.pt";,
"https://bebes.kazulo.pt";,
"https://canalpanda.pt";,
"https://tv.disney.pt";,
"https://www.nickelodeon.pt";,
"https://estrelaseouricos.sapo.pt";,
"https://www.cartoonnetwork.pt";,
"https://www.coronakids.pt";,
"https://poki.pt";,
"https://www.1001jogos.pt";,
"https://www.ajudadeberco.pt";,
"https://www.cnpdpcj.gov.pt";,
"https://www.apfn.com.pt";,
"https://bledina.pt";,
"https://nutriben.pt";,
"https://empresa.nestle.pt";,
"https://www.imaginarium.pt";,
"https://www.chicco.pt";,
"https://www.toysrus.pt";,
"https://www.disneylandparis.pt";,
"https://www.oceanario.pt";,
"https://www.kidzania.pt";,
"https://lisboastorycentre.pt";,
"https://www.zoomarine.pt";,
"https://arquitectos.pt";,
"https://www.lnec.pt";,
"https://www.ordemengenheiros.pt";,
"https://www.oet.pt";,
"https://www.plus500.pt";,
"https://www.cmvm.pt";,
"https://www.investirnabolsa.pt";,
"https://www.deco.proteste.pt";,
"https://www.jornaldenegocios.pt";,
"https://www.rankia.pt";,
"https://observador.pt";,
"https://www.rbe.mec.pt";,
"https://cvc.instituto-camoes.pt";,
"https://bndigital.bnportugal.gov.pt";,
"https://www.b-on.pt";,
"https://www.priberam.pt";,
"https://www.infopedia.pt";,
"https://www.dgeste.mec.pt";,
"https://www.escolavirtual.pt";,
"https://ensina.rtp.pt";,
"https://www.seguranet.pt";,
"https://www.universia.pt";,
"https://www.forum.pt";,
"https://ciberduvidas.iscte-iul.pt";,
"https://orientacao-vocacional.com.pt";,
"https://www.educare.pt";,
"https://www.dge.mec.pt";,
"https://www.dges.gov.pt";,
"https://www.igec.mec.pt";,
"https://portaldasmatriculas.edu.gov.pt";,
"https://www.dgeec.mec.pt";,
"https://www.pnl2027.gov.pt";,
"https://www.gee.gov.pt";,
"https://www.portoeditora.pt";,
"https://iave.pt";,
"https://www.fenprof.pt";,
"https://fne.pt";,
"https://www.sepleu.pt";,
"https://sindep.pt";,
"https://www.sipe.pt";,
"https://www.spliu.pt";,
"https://www.aspl.webhs.pt";,
"https://www.spzn.pt";,
"https://www.spzs.pt";,
"https://www.spzc.pt";,
"https://www.sdpa.pt";,
"https://anprofessores.pt";,
"https://aspl.webhs.pt";,
"https://apei.pt";,
"https://manuaisescolares.pt";,
"https://dislexia.pt";,
"https://hiperatividade.com.pt";,
"https://www.portalbullying.com.pt";,
"https://www.timeout.pt";,
"https://www.viagogo.pt";,
"https://www.agendalx.pt";,
"https://www.guiadacidade.pt";,
"https://viva-porto.pt";,
"https://www.cnc.pt";,
"https://www.culturgest.pt";,
"https://www.e-cultura.pt";,
"https://eventosemportugal.pt";,
"https://www.tripadvisor.pt";,
"https://www.ccb.pt";,
"https://gulbenkian.pt";,
"https://www.serralves.pt";,
"https://arena.altice.pt";,
"https://www.coliseu.pt";,
"https://cae.pt";,
"https://www.bol.pt";,
"https://bilheteira.fnac.pt";,
"https://ticketline.sapo.pt";,
"https://www.acad-ciencias.pt";,
"https://www.fct.pt";,
"https://www.pavconhecimento.pt";,
"https://www.cienciaviva.pt";,
"https://oal.ul.pt";,
"https://www.rcaap.pt";,
"https://scholar.google.pt";,
"https://www.museus.ulisboa.pt";,
"https://www.abarth.pt";,
"https://www.alfaromeo.pt";,
"https://www.audi.pt";,
"https://www.chevrolet.pt";,
"https://www.bmw.pt";,
"https://www.citroen.pt";,
"https://www.cupraofficial.pt";,
"https://www.dacia.pt";,
"https://www.fiat.pt";,
"https://www.honda.pt";,
"https://www.ford.pt";,
"https://www.hyundai.pt";,
"https://kia.pt";,
"https://www.lexus.pt";,
"https://www.mazda.pt";,
"https://www.mercedes-benz.pt";,
"https://www.mini.pt";,
"https://www.mitsubishi-motors.pt";,
"https://www.opel.pt";,
"https://www.nissan.pt";,
"https://www.peugeot.pt";,
"https://www.renault.pt";,
"https://www.seat.pt";,
"https://www.skoda-auto.pt";,
"https://www.suzuki.pt";,
"https://www.toyota.pt";,
"https://www.volkswagen.pt";,
"https://autoportal.iol.pt";,
"https://www.turbo.pt";,
"https://www.tuning.online.pt";,
"https://www.velocidades.pt";,
"https://www.circuito-estoril.pt";,
"https://volantesic.pt";,
"https://www.avis.com.pt";,
"https://www.budget.com.pt";,
"https://www.turiscar.pt";,
"https://www.autoeurope.pt";,
"https://www.skyscanner.pt";,
"https://auto.sapo.pt";,
"https://www.autousados.pt";,
"https://www.olx.pt";,
"https://www.piscapisca.pt";,
"https://www.autocompraevenda.pt";,
"https://www.autouncle.pt";,
"https://www.precoscombustiveis.dgeg.pt";,
"https://servicos.infraestruturasdeportugal.pt";,
"https://www.viaverde.pt";,
"https://www.brisa.pt";,
"https://www.acp.pt";,
"https://www.eparkempresas.pt";,
"https://cinemas.nos.pt";,
"https://www.ucicinemas.pt";,
};

size_t bytes_downloaded;
struct curl_slist *headers = NULL;
int transfers = 0;


//-------------------------------------
// Function Declarations
//-------------------------------------

void print_time_duration(clock_t start, clock_t end)
{
	double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
   int responses_p_sec = (int)100/cpu_time_used;
	printf(" [ LUIS ] - The execution duration was %f seconds \n", cpu_time_used);
   printf(" [ LUIS ] - %d responses per second\n",responses_p_sec);
}


int create_linked_list_headers(void)
{
   for(int i = 0; i<NUM_HEADERS; i++){
      headers = curl_slist_append(headers, HEADERS[i]);
      if (headers == NULL) {
         curl_slist_free_all(headers);
         fprintf(stderr, 
            " [ LUIS ] - Error filling the list of headers with header %s\n",HEADERS[i]);
         return -1;
      }
   }
}


void wait_for_threads(pthread_t *tid, int cores)
{
   for(int i = 0; i<cores; i++){
      pthread_join(tid[i], NULL);
      fprintf(stderr, " [ LUIS ] - Thread %d terminated\n", i);
   }
}


int transfer_control_loop(CURLM *multi_handle,FILE * file)
{
   int previous_still_running, still_running = 1;
   CURLMcode multi_error;
   struct CURLMsg *m;
   while(still_running){

      // Write on still_running the amount of handles that still transfer data
      multi_error = curl_multi_perform(multi_handle, &still_running);

      // If any tranfer completed
      if(still_running < previous_still_running){
         previous_still_running == still_running;
         int msgq = 0;
         m = curl_multi_info_read(multi_handle, &msgq);
         if(m && (m->msg == CURLMSG_DONE)){
            CURL *e = m->easy_handle;
            curl_multi_remove_handle(multi_handle, e);
            curl_easy_cleanup(e);
         } else if(m && (m->msg != CURLMSG_DONE)){
            curl_easy_getinfo(m->easy_handle,CURLINFO_EFFECTIVE_URL, multi_error);
            fprintf(file," [ LUIS ] - Mensagem %s da easy para URL:%s\n", 
            (char *)m->msg,(char *)multi_error);
         }
      }

      if (still_running)
         multi_error = curl_multi_poll(multi_handle, NULL, 0, 1000, NULL );
      
      if(multi_error)
         break;
   }
}


static size_t
writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata)
{
	bytes_downloaded += size * nmemb;
   fprintf(stderr," [ LUIS ] - Bytes downloaded: %lu\n",
            bytes_downloaded / 4 );

   printf("%s",ptr);
   return size * nmemb;
}


static void * multi_crawl_thread(void *arg)
{
   // Declarar o vector de handles e a handle multi
   CURL *handles[NUM_SEEDS];
   CURLM *multi_handle;
   int i;
   
   // Print thead ID
   pthread_t id = pthread_self();
   char id_string[128];
   sprintf(id_string,"%lu",id);
   char *file_name = strncat(id_string,"_thread_log.txt",16);

   // Delete older files


   // Create a file for logs
   FILE * file = fopen(file_name,"w");
   if(file == NULL){
      printf(" [LUIS] - Error opening file %s",file_name);
   }

   // Create handles and set options
   for(i = 0; i< NUM_SEEDS; i++){
      handles[i] = curl_easy_init();
      if(handles[i]){
         curl_easy_setopt(handles[i], CURLOPT_URL, seeds[i]);
         curl_easy_setopt(handles[i], CURLOPT_STDERR, file);
         curl_easy_setopt(handles[i], CURLOPT_VERBOSE, 1L);
         curl_easy_setopt(handles[i], CURLOPT_HTTPHEADER, headers);
         curl_easy_setopt(handles[i], CURLOPT_DNS_SERVERS, "8.8.8.8");
         // Adicionados para teste
         curl_easy_setopt(handles[i], CURLOPT_DNS_CACHE_TIMEOUT,0L );
         fprintf(file," [ LUIS ] - Created easy handle %d na thread %lu\n", i, id);
      }
   }
   printf(" [ LUIS ] - Thread %lu set up %d transfers\n",id,i);

   // Criar a multi_handle
   multi_handle = curl_multi_init();
   
   // Adicionar handles à stack multi
   for(i = 0; i < NUM_SEEDS; i++){
      curl_multi_add_handle(multi_handle, handles[i]);
   }

   // Transfer controll loop
   transfer_control_loop(multi_handle, file);
   printf(" [ LUIS ] - Depois do do transfer control loop\n");
   // Cleanup after execution
   // esta foma de cleanup não está correcta - Leva a core dump
   for(i = 0; i < NUM_SEEDS; i++){
      printf(" [ LUIS ] - Dentro do loop cleanup after execution\n");
      curl_multi_remove_handle(multi_handle, handles[i]);
      printf(" [ LUIS ] - Dentro loop cleanup after execution - Depois de remove handle do multi\n");
      
      curl_easy_cleanup(handles[i]);
      printf(" [ LUIS ] - Dentro loop cleanup after execution - Depois do easycleanup\n");
   }

   curl_multi_cleanup(multi_handle);
   printf(" [ LUIS ] - Depois do curl_multi_cleanup\n");
   return NULL;
}


void create_threads(pthread_t *tid, int cores)
{
   for(int i = 0; i<cores; i++){
      printf(" [ LUIS ] - dentro do loop q cria thereads\n");
      int error = pthread_create(&tid[i],
                                 NULL,
                                 multi_crawl_thread,
                                 NULL);
      fprintf(stderr," [ LUIS ] - Depois de criado a %d thread com handle: %d  e erro:\n",i,error);
      if(0 != error){
         fprintf(stderr, " [ LUIS ] - Couldn't run thread number %d, errno %d\n",i,error);
      }
   }
}


int main()
{
   int cores = get_nprocs();
   printf(" [ LUIS ] - %d working cores on this machine!\n",cores);

   // Initializing thread-unsafe curl third-party libraries - Before anything
   curl_global_init(CURL_GLOBAL_ALL); 

   // Create global liked list of headers
   create_linked_list_headers();

   // Criar as threads
   pthread_t tid[cores];
   create_threads(tid, cores);
   clock_t start = clock();

   // Wait for all threads to terminate */
   wait_for_threads(tid,cores);
   clock_t end = clock();
   print_time_duration(start, end);

   curl_global_cleanup();

   return 0;
}
-- 
Unsubscribe: https://lists.haxx.se/mailman/listinfo/curl-library
Etiquette:   https://curl.se/mail/etiquette.html

Reply via email to