Hello everybody,
I'm building a web crawler.
Started with python but the need for speed led me to libcurl.
The obstacle I'm having is: when using the multi interface a big part of
the the connections cloe because of the error "Could not resolve host:"
What I've tried:
1 - Confirmed that all URLs are correct.
2 - Confirmed that using the easy interface URLs are resolved.
3 - Confirmed that other programs like dig, resolvectl, nslookup can
resolve URLs that multi interface couldn't.
4 - Set a "reputable" DNS server doing: curl_easy_setopt(handles[i],
CURLOPT_DNS_SERVERS, "8.8.8.8");.
5 - In the file /etc/nsswitch.conf, line "hosts: files
nDNS[NOTFOUNDED=return] dns" I deleted [NOTFOUNDED=return].
6 - I've turned verbose on - Doesn't help me much.
I don't know what to do more as it seems a problem inside the library,
so i posted here before going digging in the library.
System info:
O.S. - Ubuntu 22.04.3 LTS
Harware - HP HP Pavilion Laptop 14-ce3xxx
Processor - Intel® Core™ i7-1065G7 CPU @ 1.30GHz × 8
libcurl version - 7.81.0
Attached I send the source code file I'm using do debug this problem.
Best Regards,
Luis Figueira
/* ------------------------------------------
DESCRIÇÃO
Crawler de todos os sites de TLD .pt
Detectar todas as diferentes estruturas de html (tipos de pagina web)
Não usar HTML parser para não abrandar o crawl?
Usar apenas os characteres que seguem o "<"
Guardar o URL de todas essas pagina
-------------------------------------------*/
#include <curl/curl.h>
#include <stdio.h>
#include <time.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/sysinfo.h>
#include <string.h>
#include <search.h>
#include <sys/queue.h>
#define NUM_SEEDS 600
#define NUM_HEADERS 11
const char * const HEADERS[NUM_HEADERS] = {
"Accept: text/html,application/xhtml+xml,application/xml q=0.9,image/avif,image/webp,image/apng,*/* q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding: gzip, br",
"Accept-Language: pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7",
"Device-Memory: 8",
"Downlink: 100",
"Sec-Ch-Ua: \"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"",
"Sec-Ch-Ua-Arch: \"x86\"",
"Sec-Ch-Ua-Platform: \"Linux\"",
"Connection: keep-alive",
"User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
"Viewport-Width: 1920"};
const char * const seeds[NUM_SEEDS] = {
"https://melhores-sites.pt",
"https://pt.trustpilot.com",
"https://ccm.marinha.pt",
"https://www.natgeo.pt",
"https://www.zoo.pt",
"https://arcadenoe.pt",
"https://revistajardins.pt",
"https://www.meganimal.pt",
"https://www.miscota.pt",
"https://www.zooplus.pt",
"https://www.tiendanimal.pt",
"https://www.kiwoko.pt",
"https://www.petoutlet.pt",
"https://europlantas.pt",
"https://www.pinterest.pt",
"https://www.ipma.pt",
"https://www.google.pt",
"https://www.teleculinaria.pt",
"https://www.vaqueiro.pt",
"https://www.pingodoce.pt",
"https://lifestyle.sapo.pt",
"https://feed.continente.pt",
"https://saboreiaavida.nestle.pt",
"https://www.ivdp.pt",
"https://www.revistadevinhos.pt",
"https://www.vinhosdoalentejo.pt",
"https://www.drinksco.pt",
"https://www.cascawines.pt",
"https://www.clubevinhosportugueses.pt",
"https://www.onwine.pt",
"https://www.vinha.pt",
"https://www.garrafeirasoares.pt",
"https://wine.pt",
"https://estadoliquido.pt",
"https://receitasangola.blogspot.pt",
"https://www.apn.org.pt",
"https://www.thefork.pt",
"https://nutrimento.pt",
"https://www.arodadaalimentacao.pt",
"https://www.nit.pt",
"https://www.viversaudavel.pt",
"https://media.rtp.pt",
"https://sic.pt",
"https://www.daamaoafloresta.pt",
"https://www.radiomiudos.pt",
"https://bebes.kazulo.pt",
"https://canalpanda.pt",
"https://tv.disney.pt",
"https://www.nickelodeon.pt",
"https://estrelaseouricos.sapo.pt",
"https://www.cartoonnetwork.pt",
"https://www.coronakids.pt",
"https://poki.pt",
"https://www.1001jogos.pt",
"https://www.ajudadeberco.pt",
"https://www.cnpdpcj.gov.pt",
"https://www.apfn.com.pt",
"https://bledina.pt",
"https://nutriben.pt",
"https://empresa.nestle.pt",
"https://www.imaginarium.pt",
"https://www.chicco.pt",
"https://www.toysrus.pt",
"https://www.disneylandparis.pt",
"https://www.oceanario.pt",
"https://www.kidzania.pt",
"https://lisboastorycentre.pt",
"https://www.zoomarine.pt",
"https://arquitectos.pt",
"https://www.lnec.pt",
"https://www.ordemengenheiros.pt",
"https://www.oet.pt",
"https://www.plus500.pt",
"https://www.cmvm.pt",
"https://www.investirnabolsa.pt",
"https://www.deco.proteste.pt",
"https://www.jornaldenegocios.pt",
"https://www.rankia.pt",
"https://observador.pt",
"https://www.rbe.mec.pt",
"https://cvc.instituto-camoes.pt",
"https://bndigital.bnportugal.gov.pt",
"https://www.b-on.pt",
"https://www.priberam.pt",
"https://www.infopedia.pt",
"https://www.dgeste.mec.pt",
"https://www.escolavirtual.pt",
"https://ensina.rtp.pt",
"https://www.seguranet.pt",
"https://www.universia.pt",
"https://www.forum.pt",
"https://ciberduvidas.iscte-iul.pt",
"https://orientacao-vocacional.com.pt",
"https://www.educare.pt",
"https://www.dge.mec.pt",
"https://www.dges.gov.pt",
"https://www.igec.mec.pt",
"https://portaldasmatriculas.edu.gov.pt",
"https://www.dgeec.mec.pt",
"https://www.pnl2027.gov.pt",
"https://www.gee.gov.pt",
"https://www.portoeditora.pt",
"https://iave.pt",
"https://www.fenprof.pt",
"https://fne.pt",
"https://www.sepleu.pt",
"https://sindep.pt",
"https://www.sipe.pt",
"https://www.spliu.pt",
"https://www.aspl.webhs.pt",
"https://www.spzn.pt",
"https://www.spzs.pt",
"https://www.spzc.pt",
"https://www.sdpa.pt",
"https://anprofessores.pt",
"https://aspl.webhs.pt",
"https://apei.pt",
"https://manuaisescolares.pt",
"https://dislexia.pt",
"https://hiperatividade.com.pt",
"https://www.portalbullying.com.pt",
"https://www.timeout.pt",
"https://www.viagogo.pt",
"https://www.agendalx.pt",
"https://www.guiadacidade.pt",
"https://viva-porto.pt",
"https://www.cnc.pt",
"https://www.culturgest.pt",
"https://www.e-cultura.pt",
"https://eventosemportugal.pt",
"https://www.tripadvisor.pt",
"https://www.ccb.pt",
"https://gulbenkian.pt",
"https://www.serralves.pt",
"https://arena.altice.pt",
"https://www.coliseu.pt",
"https://cae.pt",
"https://www.bol.pt",
"https://bilheteira.fnac.pt",
"https://ticketline.sapo.pt",
"https://www.acad-ciencias.pt",
"https://www.fct.pt",
"https://www.pavconhecimento.pt",
"https://www.cienciaviva.pt",
"https://oal.ul.pt",
"https://www.rcaap.pt",
"https://scholar.google.pt",
"https://www.museus.ulisboa.pt",
"https://www.abarth.pt",
"https://www.alfaromeo.pt",
"https://www.audi.pt",
"https://www.chevrolet.pt",
"https://www.bmw.pt",
"https://www.citroen.pt",
"https://www.cupraofficial.pt",
"https://www.dacia.pt",
"https://www.fiat.pt",
"https://www.honda.pt",
"https://www.ford.pt",
"https://www.hyundai.pt",
"https://kia.pt",
"https://www.lexus.pt",
"https://www.mazda.pt",
"https://www.mercedes-benz.pt",
"https://www.mini.pt",
"https://www.mitsubishi-motors.pt",
"https://www.opel.pt",
"https://www.nissan.pt",
"https://www.peugeot.pt",
"https://www.renault.pt",
"https://www.seat.pt",
"https://www.skoda-auto.pt",
"https://www.suzuki.pt",
"https://www.toyota.pt",
"https://www.volkswagen.pt",
"https://autoportal.iol.pt",
"https://www.turbo.pt",
"https://www.tuning.online.pt",
"https://www.velocidades.pt",
"https://www.circuito-estoril.pt",
"https://volantesic.pt",
"https://www.avis.com.pt",
"https://www.budget.com.pt",
"https://www.turiscar.pt",
"https://www.autoeurope.pt",
"https://www.skyscanner.pt",
"https://auto.sapo.pt",
"https://www.autousados.pt",
"https://www.olx.pt",
"https://www.piscapisca.pt",
"https://www.autocompraevenda.pt",
"https://www.autouncle.pt",
"https://www.precoscombustiveis.dgeg.pt",
"https://servicos.infraestruturasdeportugal.pt",
"https://www.viaverde.pt",
"https://www.brisa.pt",
"https://www.acp.pt",
"https://www.eparkempresas.pt",
"https://cinemas.nos.pt",
"https://www.ucicinemas.pt",
"https://melhores-sites.pt",
"https://pt.trustpilot.com",
"https://ccm.marinha.pt",
"https://www.natgeo.pt",
"https://www.zoo.pt",
"https://arcadenoe.pt",
"https://revistajardins.pt",
"https://www.meganimal.pt",
"https://www.miscota.pt",
"https://www.zooplus.pt",
"https://www.tiendanimal.pt",
"https://www.kiwoko.pt",
"https://www.petoutlet.pt",
"https://europlantas.pt",
"https://www.pinterest.pt",
"https://www.ipma.pt",
"https://www.google.pt",
"https://www.teleculinaria.pt",
"https://www.vaqueiro.pt",
"https://www.pingodoce.pt",
"https://lifestyle.sapo.pt",
"https://feed.continente.pt",
"https://saboreiaavida.nestle.pt",
"https://www.ivdp.pt",
"https://www.revistadevinhos.pt",
"https://www.vinhosdoalentejo.pt",
"https://www.drinksco.pt",
"https://www.cascawines.pt",
"https://www.clubevinhosportugueses.pt",
"https://www.onwine.pt",
"https://www.vinha.pt",
"https://www.garrafeirasoares.pt",
"https://wine.pt",
"https://estadoliquido.pt",
"https://receitasangola.blogspot.pt",
"https://www.apn.org.pt",
"https://www.thefork.pt",
"https://nutrimento.pt",
"https://www.arodadaalimentacao.pt",
"https://www.nit.pt",
"https://www.viversaudavel.pt",
"https://media.rtp.pt",
"https://sic.pt",
"https://www.daamaoafloresta.pt",
"https://www.radiomiudos.pt",
"https://bebes.kazulo.pt",
"https://canalpanda.pt",
"https://tv.disney.pt",
"https://www.nickelodeon.pt",
"https://estrelaseouricos.sapo.pt",
"https://www.cartoonnetwork.pt",
"https://www.coronakids.pt",
"https://poki.pt",
"https://www.1001jogos.pt",
"https://www.ajudadeberco.pt",
"https://www.cnpdpcj.gov.pt",
"https://www.apfn.com.pt",
"https://bledina.pt",
"https://nutriben.pt",
"https://empresa.nestle.pt",
"https://www.imaginarium.pt",
"https://www.chicco.pt",
"https://www.toysrus.pt",
"https://www.disneylandparis.pt",
"https://www.oceanario.pt",
"https://www.kidzania.pt",
"https://lisboastorycentre.pt",
"https://www.zoomarine.pt",
"https://arquitectos.pt",
"https://www.lnec.pt",
"https://www.ordemengenheiros.pt",
"https://www.oet.pt",
"https://www.plus500.pt",
"https://www.cmvm.pt",
"https://www.investirnabolsa.pt",
"https://www.deco.proteste.pt",
"https://www.jornaldenegocios.pt",
"https://www.rankia.pt",
"https://observador.pt",
"https://www.rbe.mec.pt",
"https://cvc.instituto-camoes.pt",
"https://bndigital.bnportugal.gov.pt",
"https://www.b-on.pt",
"https://www.priberam.pt",
"https://www.infopedia.pt",
"https://www.dgeste.mec.pt",
"https://www.escolavirtual.pt",
"https://ensina.rtp.pt",
"https://www.seguranet.pt",
"https://www.universia.pt",
"https://www.forum.pt",
"https://ciberduvidas.iscte-iul.pt",
"https://orientacao-vocacional.com.pt",
"https://www.educare.pt",
"https://www.dge.mec.pt",
"https://www.dges.gov.pt",
"https://www.igec.mec.pt",
"https://portaldasmatriculas.edu.gov.pt",
"https://www.dgeec.mec.pt",
"https://www.pnl2027.gov.pt",
"https://www.gee.gov.pt",
"https://www.portoeditora.pt",
"https://iave.pt",
"https://www.fenprof.pt",
"https://fne.pt",
"https://www.sepleu.pt",
"https://sindep.pt",
"https://www.sipe.pt",
"https://www.spliu.pt",
"https://www.aspl.webhs.pt",
"https://www.spzn.pt",
"https://www.spzs.pt",
"https://www.spzc.pt",
"https://www.sdpa.pt",
"https://anprofessores.pt",
"https://aspl.webhs.pt",
"https://apei.pt",
"https://manuaisescolares.pt",
"https://dislexia.pt",
"https://hiperatividade.com.pt",
"https://www.portalbullying.com.pt",
"https://www.timeout.pt",
"https://www.viagogo.pt",
"https://www.agendalx.pt",
"https://www.guiadacidade.pt",
"https://viva-porto.pt",
"https://www.cnc.pt",
"https://www.culturgest.pt",
"https://www.e-cultura.pt",
"https://eventosemportugal.pt",
"https://www.tripadvisor.pt",
"https://www.ccb.pt",
"https://gulbenkian.pt",
"https://www.serralves.pt",
"https://arena.altice.pt",
"https://www.coliseu.pt",
"https://cae.pt",
"https://www.bol.pt",
"https://bilheteira.fnac.pt",
"https://ticketline.sapo.pt",
"https://www.acad-ciencias.pt",
"https://www.fct.pt",
"https://www.pavconhecimento.pt",
"https://www.cienciaviva.pt",
"https://oal.ul.pt",
"https://www.rcaap.pt",
"https://scholar.google.pt",
"https://www.museus.ulisboa.pt",
"https://www.abarth.pt",
"https://www.alfaromeo.pt",
"https://www.audi.pt",
"https://www.chevrolet.pt",
"https://www.bmw.pt",
"https://www.citroen.pt",
"https://www.cupraofficial.pt",
"https://www.dacia.pt",
"https://www.fiat.pt",
"https://www.honda.pt",
"https://www.ford.pt",
"https://www.hyundai.pt",
"https://kia.pt",
"https://www.lexus.pt",
"https://www.mazda.pt",
"https://www.mercedes-benz.pt",
"https://www.mini.pt",
"https://www.mitsubishi-motors.pt",
"https://www.opel.pt",
"https://www.nissan.pt",
"https://www.peugeot.pt",
"https://www.renault.pt",
"https://www.seat.pt",
"https://www.skoda-auto.pt",
"https://www.suzuki.pt",
"https://www.toyota.pt",
"https://www.volkswagen.pt",
"https://autoportal.iol.pt",
"https://www.turbo.pt",
"https://www.tuning.online.pt",
"https://www.velocidades.pt",
"https://www.circuito-estoril.pt",
"https://volantesic.pt",
"https://www.avis.com.pt",
"https://www.budget.com.pt",
"https://www.turiscar.pt",
"https://www.autoeurope.pt",
"https://www.skyscanner.pt",
"https://auto.sapo.pt",
"https://www.autousados.pt",
"https://www.olx.pt",
"https://www.piscapisca.pt",
"https://www.autocompraevenda.pt",
"https://www.autouncle.pt",
"https://www.precoscombustiveis.dgeg.pt",
"https://servicos.infraestruturasdeportugal.pt",
"https://www.viaverde.pt",
"https://www.brisa.pt",
"https://www.acp.pt",
"https://www.eparkempresas.pt",
"https://cinemas.nos.pt",
"https://www.ucicinemas.pt",
"https://melhores-sites.pt",
"https://pt.trustpilot.com",
"https://ccm.marinha.pt",
"https://www.natgeo.pt",
"https://www.zoo.pt",
"https://arcadenoe.pt",
"https://revistajardins.pt",
"https://www.meganimal.pt",
"https://www.miscota.pt",
"https://www.zooplus.pt",
"https://www.tiendanimal.pt",
"https://www.kiwoko.pt",
"https://www.petoutlet.pt",
"https://europlantas.pt",
"https://www.pinterest.pt",
"https://www.ipma.pt",
"https://www.google.pt",
"https://www.teleculinaria.pt",
"https://www.vaqueiro.pt",
"https://www.pingodoce.pt",
"https://lifestyle.sapo.pt",
"https://feed.continente.pt",
"https://saboreiaavida.nestle.pt",
"https://www.ivdp.pt",
"https://www.revistadevinhos.pt",
"https://www.vinhosdoalentejo.pt",
"https://www.drinksco.pt",
"https://www.cascawines.pt",
"https://www.clubevinhosportugueses.pt",
"https://www.onwine.pt",
"https://www.vinha.pt",
"https://www.garrafeirasoares.pt",
"https://wine.pt",
"https://estadoliquido.pt",
"https://receitasangola.blogspot.pt",
"https://www.apn.org.pt",
"https://www.thefork.pt",
"https://nutrimento.pt",
"https://www.arodadaalimentacao.pt",
"https://www.nit.pt",
"https://www.viversaudavel.pt",
"https://media.rtp.pt",
"https://sic.pt",
"https://www.daamaoafloresta.pt",
"https://www.radiomiudos.pt",
"https://bebes.kazulo.pt",
"https://canalpanda.pt",
"https://tv.disney.pt",
"https://www.nickelodeon.pt",
"https://estrelaseouricos.sapo.pt",
"https://www.cartoonnetwork.pt",
"https://www.coronakids.pt",
"https://poki.pt",
"https://www.1001jogos.pt",
"https://www.ajudadeberco.pt",
"https://www.cnpdpcj.gov.pt",
"https://www.apfn.com.pt",
"https://bledina.pt",
"https://nutriben.pt",
"https://empresa.nestle.pt",
"https://www.imaginarium.pt",
"https://www.chicco.pt",
"https://www.toysrus.pt",
"https://www.disneylandparis.pt",
"https://www.oceanario.pt",
"https://www.kidzania.pt",
"https://lisboastorycentre.pt",
"https://www.zoomarine.pt",
"https://arquitectos.pt",
"https://www.lnec.pt",
"https://www.ordemengenheiros.pt",
"https://www.oet.pt",
"https://www.plus500.pt",
"https://www.cmvm.pt",
"https://www.investirnabolsa.pt",
"https://www.deco.proteste.pt",
"https://www.jornaldenegocios.pt",
"https://www.rankia.pt",
"https://observador.pt",
"https://www.rbe.mec.pt",
"https://cvc.instituto-camoes.pt",
"https://bndigital.bnportugal.gov.pt",
"https://www.b-on.pt",
"https://www.priberam.pt",
"https://www.infopedia.pt",
"https://www.dgeste.mec.pt",
"https://www.escolavirtual.pt",
"https://ensina.rtp.pt",
"https://www.seguranet.pt",
"https://www.universia.pt",
"https://www.forum.pt",
"https://ciberduvidas.iscte-iul.pt",
"https://orientacao-vocacional.com.pt",
"https://www.educare.pt",
"https://www.dge.mec.pt",
"https://www.dges.gov.pt",
"https://www.igec.mec.pt",
"https://portaldasmatriculas.edu.gov.pt",
"https://www.dgeec.mec.pt",
"https://www.pnl2027.gov.pt",
"https://www.gee.gov.pt",
"https://www.portoeditora.pt",
"https://iave.pt",
"https://www.fenprof.pt",
"https://fne.pt",
"https://www.sepleu.pt",
"https://sindep.pt",
"https://www.sipe.pt",
"https://www.spliu.pt",
"https://www.aspl.webhs.pt",
"https://www.spzn.pt",
"https://www.spzs.pt",
"https://www.spzc.pt",
"https://www.sdpa.pt",
"https://anprofessores.pt",
"https://aspl.webhs.pt",
"https://apei.pt",
"https://manuaisescolares.pt",
"https://dislexia.pt",
"https://hiperatividade.com.pt",
"https://www.portalbullying.com.pt",
"https://www.timeout.pt",
"https://www.viagogo.pt",
"https://www.agendalx.pt",
"https://www.guiadacidade.pt",
"https://viva-porto.pt",
"https://www.cnc.pt",
"https://www.culturgest.pt",
"https://www.e-cultura.pt",
"https://eventosemportugal.pt",
"https://www.tripadvisor.pt",
"https://www.ccb.pt",
"https://gulbenkian.pt",
"https://www.serralves.pt",
"https://arena.altice.pt",
"https://www.coliseu.pt",
"https://cae.pt",
"https://www.bol.pt",
"https://bilheteira.fnac.pt",
"https://ticketline.sapo.pt",
"https://www.acad-ciencias.pt",
"https://www.fct.pt",
"https://www.pavconhecimento.pt",
"https://www.cienciaviva.pt",
"https://oal.ul.pt",
"https://www.rcaap.pt",
"https://scholar.google.pt",
"https://www.museus.ulisboa.pt",
"https://www.abarth.pt",
"https://www.alfaromeo.pt",
"https://www.audi.pt",
"https://www.chevrolet.pt",
"https://www.bmw.pt",
"https://www.citroen.pt",
"https://www.cupraofficial.pt",
"https://www.dacia.pt",
"https://www.fiat.pt",
"https://www.honda.pt",
"https://www.ford.pt",
"https://www.hyundai.pt",
"https://kia.pt",
"https://www.lexus.pt",
"https://www.mazda.pt",
"https://www.mercedes-benz.pt",
"https://www.mini.pt",
"https://www.mitsubishi-motors.pt",
"https://www.opel.pt",
"https://www.nissan.pt",
"https://www.peugeot.pt",
"https://www.renault.pt",
"https://www.seat.pt",
"https://www.skoda-auto.pt",
"https://www.suzuki.pt",
"https://www.toyota.pt",
"https://www.volkswagen.pt",
"https://autoportal.iol.pt",
"https://www.turbo.pt",
"https://www.tuning.online.pt",
"https://www.velocidades.pt",
"https://www.circuito-estoril.pt",
"https://volantesic.pt",
"https://www.avis.com.pt",
"https://www.budget.com.pt",
"https://www.turiscar.pt",
"https://www.autoeurope.pt",
"https://www.skyscanner.pt",
"https://auto.sapo.pt",
"https://www.autousados.pt",
"https://www.olx.pt",
"https://www.piscapisca.pt",
"https://www.autocompraevenda.pt",
"https://www.autouncle.pt",
"https://www.precoscombustiveis.dgeg.pt",
"https://servicos.infraestruturasdeportugal.pt",
"https://www.viaverde.pt",
"https://www.brisa.pt",
"https://www.acp.pt",
"https://www.eparkempresas.pt",
"https://cinemas.nos.pt",
"https://www.ucicinemas.pt",
};
size_t bytes_downloaded;
struct curl_slist *headers = NULL;
int transfers = 0;
//-------------------------------------
// Function Declarations
//-------------------------------------
void print_time_duration(clock_t start, clock_t end)
{
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
int responses_p_sec = (int)100/cpu_time_used;
printf(" [ LUIS ] - The execution duration was %f seconds \n", cpu_time_used);
printf(" [ LUIS ] - %d responses per second\n",responses_p_sec);
}
int create_linked_list_headers(void)
{
for(int i = 0; i<NUM_HEADERS; i++){
headers = curl_slist_append(headers, HEADERS[i]);
if (headers == NULL) {
curl_slist_free_all(headers);
fprintf(stderr,
" [ LUIS ] - Error filling the list of headers with header %s\n",HEADERS[i]);
return -1;
}
}
}
void wait_for_threads(pthread_t *tid, int cores)
{
for(int i = 0; i<cores; i++){
pthread_join(tid[i], NULL);
fprintf(stderr, " [ LUIS ] - Thread %d terminated\n", i);
}
}
int transfer_control_loop(CURLM *multi_handle,FILE * file)
{
int previous_still_running, still_running = 1;
CURLMcode multi_error;
struct CURLMsg *m;
while(still_running){
// Write on still_running the amount of handles that still transfer data
multi_error = curl_multi_perform(multi_handle, &still_running);
// If any tranfer completed
if(still_running < previous_still_running){
previous_still_running == still_running;
int msgq = 0;
m = curl_multi_info_read(multi_handle, &msgq);
if(m && (m->msg == CURLMSG_DONE)){
CURL *e = m->easy_handle;
curl_multi_remove_handle(multi_handle, e);
curl_easy_cleanup(e);
} else if(m && (m->msg != CURLMSG_DONE)){
curl_easy_getinfo(m->easy_handle,CURLINFO_EFFECTIVE_URL, multi_error);
fprintf(file," [ LUIS ] - Mensagem %s da easy para URL:%s\n",
(char *)m->msg,(char *)multi_error);
}
}
if (still_running)
multi_error = curl_multi_poll(multi_handle, NULL, 0, 1000, NULL );
if(multi_error)
break;
}
}
static size_t
writeCallback(char *ptr, size_t size, size_t nmemb, void *userdata)
{
bytes_downloaded += size * nmemb;
fprintf(stderr," [ LUIS ] - Bytes downloaded: %lu\n",
bytes_downloaded / 4 );
printf("%s",ptr);
return size * nmemb;
}
static void * multi_crawl_thread(void *arg)
{
// Declarar o vector de handles e a handle multi
CURL *handles[NUM_SEEDS];
CURLM *multi_handle;
int i;
// Print thead ID
pthread_t id = pthread_self();
char id_string[128];
sprintf(id_string,"%lu",id);
char *file_name = strncat(id_string,"_thread_log.txt",16);
// Delete older files
// Create a file for logs
FILE * file = fopen(file_name,"w");
if(file == NULL){
printf(" [LUIS] - Error opening file %s",file_name);
}
// Create handles and set options
for(i = 0; i< NUM_SEEDS; i++){
handles[i] = curl_easy_init();
if(handles[i]){
curl_easy_setopt(handles[i], CURLOPT_URL, seeds[i]);
curl_easy_setopt(handles[i], CURLOPT_STDERR, file);
curl_easy_setopt(handles[i], CURLOPT_VERBOSE, 1L);
curl_easy_setopt(handles[i], CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(handles[i], CURLOPT_DNS_SERVERS, "8.8.8.8");
// Adicionados para teste
curl_easy_setopt(handles[i], CURLOPT_DNS_CACHE_TIMEOUT,0L );
fprintf(file," [ LUIS ] - Created easy handle %d na thread %lu\n", i, id);
}
}
printf(" [ LUIS ] - Thread %lu set up %d transfers\n",id,i);
// Criar a multi_handle
multi_handle = curl_multi_init();
// Adicionar handles à stack multi
for(i = 0; i < NUM_SEEDS; i++){
curl_multi_add_handle(multi_handle, handles[i]);
}
// Transfer controll loop
transfer_control_loop(multi_handle, file);
printf(" [ LUIS ] - Depois do do transfer control loop\n");
// Cleanup after execution
// esta foma de cleanup não está correcta - Leva a core dump
for(i = 0; i < NUM_SEEDS; i++){
printf(" [ LUIS ] - Dentro do loop cleanup after execution\n");
curl_multi_remove_handle(multi_handle, handles[i]);
printf(" [ LUIS ] - Dentro loop cleanup after execution - Depois de remove handle do multi\n");
curl_easy_cleanup(handles[i]);
printf(" [ LUIS ] - Dentro loop cleanup after execution - Depois do easycleanup\n");
}
curl_multi_cleanup(multi_handle);
printf(" [ LUIS ] - Depois do curl_multi_cleanup\n");
return NULL;
}
void create_threads(pthread_t *tid, int cores)
{
for(int i = 0; i<cores; i++){
printf(" [ LUIS ] - dentro do loop q cria thereads\n");
int error = pthread_create(&tid[i],
NULL,
multi_crawl_thread,
NULL);
fprintf(stderr," [ LUIS ] - Depois de criado a %d thread com handle: %d e erro:\n",i,error);
if(0 != error){
fprintf(stderr, " [ LUIS ] - Couldn't run thread number %d, errno %d\n",i,error);
}
}
}
int main()
{
int cores = get_nprocs();
printf(" [ LUIS ] - %d working cores on this machine!\n",cores);
// Initializing thread-unsafe curl third-party libraries - Before anything
curl_global_init(CURL_GLOBAL_ALL);
// Create global liked list of headers
create_linked_list_headers();
// Criar as threads
pthread_t tid[cores];
create_threads(tid, cores);
clock_t start = clock();
// Wait for all threads to terminate */
wait_for_threads(tid,cores);
clock_t end = clock();
print_time_duration(start, end);
curl_global_cleanup();
return 0;
}
--
Unsubscribe: https://lists.haxx.se/mailman/listinfo/curl-library
Etiquette: https://curl.se/mail/etiquette.html