On 14/5/20 8:15 am, Anatol Pomozov wrote: > Until now callee of ALPM download functionality has been in charge of > payload creation both for the main file (e.g. *.pkg) and for the accompanied > *.sig file. One advantage of such solution is that all payloads are > independent and can be fetched in parallel thus exploiting the maximum > level of download parallelism. > > To build *.sig file url we've been using a simple string concatenation: > $requested_url + ".sig". Unfortunately there are cases when it does not > work. For example an archlinux.org "Download From Mirror" link looks like > this https://www.archlinux.org/packages/core/x86_64/bash/download/ and > it gets redirected to some mirror. But if we append ".sig" to the end of > the link url and try to download it then archlinux.org returns 404 error. > > To overcome this issue we need to follow redirects for the main payload > first, find the final url and only then append '.sig' suffix. > This implies 2 things: > - the signature payload initialization need to be moved to dload.c > as it is the place where we have access to the resolved url > - *.sig is downloaded serially with the main payload and this reduces > level of parallelism > > Move *.sig payload creation to dload.c. Once the main payload is fetched > successfully we check if the callee asked to download the accompanied > signature. If yes - create a new payload and add it to mcurl. > > *.sig payload does not use server list of the main payload and thus does > not support mirror failover. *.sig file comes from the same server as > the main payload. > > Refactor event loop in curl_multi_download_internal() a bit. Instead of > relying on curl_multi_check_finished_download() to return number of new > payloads we simply rerun the loop iteration one more time to check if > there are any active downloads left. > --- > lib/libalpm/be_sync.c | 34 +++------------- > lib/libalpm/dload.c | 91 ++++++++++++++++++++++++++----------------- > lib/libalpm/dload.h | 4 +- > 3 files changed, 65 insertions(+), 64 deletions(-) > > diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c > index 82018e15..41675d21 100644 > --- a/lib/libalpm/be_sync.c > +++ b/lib/libalpm/be_sync.c > @@ -180,12 +180,10 @@ int SYMEXPORT alpm_db_update(alpm_handle_t *handle, > alpm_list_t *dbs, int force) > dbforce = 1; > } > > - CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, > ALPM_ERR_MEMORY, cleanup)); > + siglevel = alpm_db_get_siglevel(db); > > - /* set hard upper limit of 128 MiB */ > - payload->max_size = 128 * 1024 * 1024; > + CALLOC(payload, 1, sizeof(*payload), GOTO_ERR(handle, > ALPM_ERR_MEMORY, cleanup)); > payload->servers = db->servers; > - > /* print server + filename into a buffer */ > len = strlen(db->treename) + strlen(dbext) + 1; > MALLOC(payload->filepath, len, > @@ -194,31 +192,11 @@ int SYMEXPORT alpm_db_update(alpm_handle_t *handle, > alpm_list_t *dbs, int force) > payload->handle = handle; > payload->force = dbforce; > payload->unlink_on_fail = 1; > - > + payload->download_signature = (siglevel & ALPM_SIG_DATABASE); > + payload->signature_optional = (siglevel & > ALPM_SIG_DATABASE_OPTIONAL); > + /* set hard upper limit of 128 MiB */ > + payload->max_size = 128 * 1024 * 1024; > payloads = alpm_list_add(payloads, payload);
OK. > - > - siglevel = alpm_db_get_siglevel(db); > - if(siglevel & ALPM_SIG_DATABASE) { > - struct dload_payload *sig_payload; > - CALLOC(sig_payload, 1, sizeof(*sig_payload), > GOTO_ERR(handle, ALPM_ERR_MEMORY, cleanup)); > - sig_payload->signature = 1; > - > - /* print filename into a buffer (leave space for > separator and .sig) */ > - len = strlen(db->treename) + strlen(dbext) + 5; > - MALLOC(sig_payload->filepath, len, > - FREE(sig_payload); GOTO_ERR(handle, > ALPM_ERR_MEMORY, cleanup)); > - snprintf(sig_payload->filepath, len, "%s%s.sig", > db->treename, dbext); > - > - sig_payload->handle = handle; > - sig_payload->force = dbforce; > - sig_payload->errors_ok = (siglevel & > ALPM_SIG_DATABASE_OPTIONAL); > - > - /* set hard upper limit of 16 KiB */ > - sig_payload->max_size = 16 * 1024; > - sig_payload->servers = db->servers; > - > - payloads = alpm_list_add(payloads, sig_payload); > - } > } > > event.type = ALPM_EVENT_DB_RETRIEVE_START; OK. > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c > index 4dbb011f..b68dcf6d 100644 > --- a/lib/libalpm/dload.c > +++ b/lib/libalpm/dload.c > @@ -18,6 +18,7 @@ > * along with this program. If not, see <http://www.gnu.org/licenses/>. > */ > > +#include <stdbool.h> See below. > #include <stdlib.h> > #include <stdio.h> > #include <errno.h> > @@ -49,6 +50,10 @@ > #include "handle.h" > > #ifdef HAVE_LIBCURL > + > +static int curl_multi_add_payload(alpm_handle_t *handle, CURLM *curlm, > + struct dload_payload *payload, const char *localpath); > + OK. > static const char *get_filename(const char *url) > { > char *filename = strrchr(url, '/'); > @@ -476,6 +481,25 @@ static int curl_multi_check_finished_download(CURLM > *curlm, CURLMsg *msg, > curl_easy_getinfo(curl, CURLINFO_CONDITION_UNMET, &timecond); > curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &effective_url); > > + /* Let's check if client requested downloading accompanion *.sig file */ > + if(!payload->signature && payload->download_signature && curlerr == > CURLE_OK && payload->respcode < 400) { > + struct dload_payload *sig = NULL; > + > + int len = strlen(effective_url) + 5; > + CALLOC(sig, 1, sizeof(*sig), GOTO_ERR(handle, ALPM_ERR_MEMORY, > cleanup)); > + MALLOC(sig->fileurl, len, FREE(sig); GOTO_ERR(handle, > ALPM_ERR_MEMORY, cleanup)); > + snprintf(sig->fileurl, len, "%s.sig", effective_url); > + sig->signature = 1; > + sig->handle = handle; > + sig->force = payload->force; > + sig->unlink_on_fail = payload->unlink_on_fail; > + sig->errors_ok = payload->signature_optional; > + /* set hard upper limit of 16KiB */ > + sig->max_size = 16 * 1024; > + > + curl_multi_add_payload(handle, curlm, sig, localpath); > + } > + OK. > /* time condition was met and we didn't download anything. we need to > * clean up the 0 byte .part file that's left behind. */ > if(timecond == 1 && DOUBLE_EQ(bytes_dl, 0)) { > @@ -565,6 +589,13 @@ cleanup: > if(ret == -1 && payload->errors_ok) { > ret = -2; > } > + > + if(payload->signature) { > + /* free signature payload memory that was allocated earlier in > dload.c */ > + _alpm_dload_payload_reset(payload); > + FREE(payload); > + } > + OK. > return ret; > } > > @@ -669,12 +700,12 @@ static int curl_multi_download_internal(alpm_handle_t > *handle, > int still_running = 0; > int err = 0; > int parallel_downloads = handle->parallel_downloads; > - > CURLM *curlm = handle->curlm; > - CURLMsg *msg; > + bool msg_done = false; > This is a weird mix of bool and int... still_running is 0/1. msg_done is true/false.f. I know stdbool started to be used in pacman with this patch series, but I think it should be removed from here and a more comprehensive made later on so not to have a mix. > - while(still_running || payloads) { > - int msgs_left = -1; > + while(still_running || payloads || msg_done) { > + int msgs_left = 0; > + CURLMcode mc; > > for(; still_running < parallel_downloads && payloads; > still_running++) { > struct dload_payload *payload = payloads->data; > @@ -687,15 +718,19 @@ static int curl_multi_download_internal(alpm_handle_t > *handle, > > payloads = payloads->next; > } else { > - // the payload failed to start, do not start > any new downloads just wait until > - // active one complete. > + /* The payload failed to start. Do not start > any new downloads. > + * Wait until all active downloads complete. > + */ Not directly related to this patch, but OK... > _alpm_log(handle, ALPM_LOG_ERROR, _("failed to > setup a download payload for %s\n"), payload->remote_name); > payloads = NULL; > err = -1; > } > } > > - CURLMcode mc = curl_multi_perform(curlm, &still_running); > + mc = curl_multi_perform(curlm, &still_running); > + if(mc == CURLM_OK) { > + mc = curl_multi_wait(curlm, NULL, 0, 1000, NULL); > + } OK. > > if(mc != CURLM_OK) { > _alpm_log(handle, ALPM_LOG_ERROR, _("curl returned > error %d from transfer\n"), mc); > @@ -703,28 +738,28 @@ static int curl_multi_download_internal(alpm_handle_t > *handle, > err = -1; > } > > - while((msg = curl_multi_info_read(curlm, &msgs_left))) { > + msg_done = false; > + while(true) { > + CURLMsg *msg = curl_multi_info_read(curlm, &msgs_left); > + if(!msg) { > + break; > + } OK. > if(msg->msg == CURLMSG_DONE) { > int ret = > curl_multi_check_finished_download(curlm, msg, localpath); > if(ret == -1) { > - /* if current payload failed to > download then stop adding new payloads but wait for the > - * current ones > - */ > + /* if current payload failed to > download then stop adding new payloads */ Fine. > payloads = NULL; > err = -1; > - } else if(ret == 2) { > - /* in case of a retry increase the > counter of active requests > - * to avoid exiting the loop early > - */ > - still_running++; > } Where is this going? > + /* curl_multi_check_finished_download() might > add more payloads e.g. in case of a retry > + * from the next mirror. We need to execute > curl_multi_perform() at least one more time > + * to make sure new payload requests are > processed. > + */ > + msg_done = true; This is a weird variable name now I have figured out what it does. My understanding is that this is really a flag to actually check that everything is done. I know it is related to the current CURLMsg being complete. Can it be renamed something like "recheck_downloads"? > } else { > _alpm_log(handle, ALPM_LOG_ERROR, _("curl > transfer error: %d\n"), msg->msg); > } > } > - if(still_running) { > - curl_multi_wait(curlm, NULL, 0, 1000, NULL); > - } > } > > _alpm_log(handle, ALPM_LOG_DEBUG, "curl_multi_download_internal return > code is %d\n", err); > @@ -803,12 +838,10 @@ int SYMEXPORT alpm_fetch_pkgurl(alpm_handle_t *handle, > const alpm_list_t *urls, > { > const char *cachedir; > alpm_list_t *payloads = NULL; > - int download_sigs; > const alpm_list_t *i; > alpm_event_t event; > > CHECK_HANDLE(handle, return -1); > - download_sigs = handle->siglevel & ALPM_SIG_PACKAGE; > > /* find a valid cache dir to download to */ > cachedir = _alpm_filecache_setup(handle); > @@ -830,21 +863,9 @@ int SYMEXPORT alpm_fetch_pkgurl(alpm_handle_t *handle, > const alpm_list_t *urls, > payload->allow_resume = 1; > payload->handle = handle; > payload->trust_remote_name = 1; > + payload->download_signature = (handle->siglevel & > ALPM_SIG_PACKAGE); > + payload->signature_optional = (handle->siglevel & > ALPM_SIG_PACKAGE_OPTIONAL); > payloads = alpm_list_add(payloads, payload); > - > - if(download_sigs) { > - int len = strlen(url) + 5; > - CALLOC(payload, 1, sizeof(*payload), > GOTO_ERR(handle, ALPM_ERR_MEMORY, err)); > - payload->signature = 1; > - MALLOC(payload->fileurl, len, FREE(payload); > GOTO_ERR(handle, ALPM_ERR_MEMORY, err)); > - snprintf(payload->fileurl, len, "%s.sig", url); > - payload->handle = handle; > - payload->trust_remote_name = 1; > - payload->errors_ok = (handle->siglevel & > ALPM_SIG_PACKAGE_OPTIONAL); > - /* set hard upper limit of 16KiB */ > - payload->max_size = 16 * 1024; > - payloads = alpm_list_add(payloads, payload); > - } OK. > } > } > > diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h > index d13bc1b5..facafca2 100644 > --- a/lib/libalpm/dload.h > +++ b/lib/libalpm/dload.h > @@ -47,11 +47,13 @@ struct dload_payload { > int errors_ok; > int unlink_on_fail; > int trust_remote_name; > - int signature; /* specifies if the payload is a signature file */ > + int download_signature; /* specifies if an accompanion *.sig file need > to be downloaded*/ > + int signature_optional; /* *.sig file is optional */ > #ifdef HAVE_LIBCURL > CURL *curl; > char error_buffer[CURL_ERROR_SIZE]; > FILE *localf; /* temp download file */ > + int signature; /* specifies if this payload is for a signature file */ > #endif > }; > >