Thank you for your comments and implementation! I am looking forward to the acceleration of nginx startup with this patch.
2023年10月11日(水) 7:56 Maxim Dounin <[email protected]>: > > Hello! > > On Thu, Oct 05, 2023 at 10:51:26AM +0900, Yusuke Nojima wrote: > > > Thank you for your comment! > > > > > Could you please provide some more details about the use case, > > > such as how locations are used, and what is the approximate number > > > of locations being used? > > > > Our team provides development environments to our company's engineers and > > QA. > > In this environment, engineers and QA can freely create VMs and deploy > > applications on them. > > > > Our nginx has the role of routing requests from the internet to all > > applications deployed in this environment. > > Additionally, it allows setting IP address restrictions, BASIC > > authentication, TLS client authentication, and other configurations > > for each application. > > > > To implement these requirements, we generate a location for each > > application. > > Currently, there are approximately 40,000 locations in our environment. > > Thank you for the details. Such configuration looks somewhat > sub-optimal, but understandable for a development / test > environment. And certainly 40k locations is a lot for the sorting > algorithm currently used. > > > > Further, since each location contains configuration for > > > all modules, such configurations are expected to require a lot of > > > memory > > > > Each of our nginx processes was consuming 5GB of memory in terms of > > resident size. > > This is not a problem as our servers have sufficient memory. > > > > > Rather, I would suggest recursive top-bottom merge sort implementation > > > instead, which is much simpler and uses stack as temporary storage > > > (so it'll naturally die if there will be a queue which requires > > > more space for sorting than we have). > > > > > > Please take a look if it works for you: > > > > I think this implementation is simple and easy to understand. > > Although the number of traversals of the list will increase compared > > to bottom-up, it will not affect the order. > > I believe this will provide sufficient optimization in terms of speed. > > Thanks for looking. In my limited testing, it is slightly faster > than your bottom-up implementation (and significantly faster than > the existing insertion sort when many locations are used). > > Below is the full patch (code unchanged), I'll commit it as soon > as some other nginx developer will review it. > > # HG changeset patch > # User Maxim Dounin <[email protected]> > # Date 1696977468 -10800 > # Wed Oct 11 01:37:48 2023 +0300 > # Node ID b891840852ee5cc823eee1769d092ab50928919f > # Parent cdda286c0f1b4b10f30d4eb6a63fefb9b8708ecc > Core: changed ngx_queue_sort() to use merge sort. > > This improves nginx startup times significantly when using very large number > of locations due computational complexity of the sorting algorithm being > used (insertion sort is O(n*n) on average, while merge sort is O(n*log(n))). > In particular, in a test configuration with 20k locations total startup > time is reduced from 8 seconds to 0.9 seconds. > > Prodded by Yusuke Nojima, > https://mailman.nginx.org/pipermail/nginx-devel/2023-September/NUL3Y2FPPFSHMPTFTL65KXSXNTX3NQMK.html > > diff --git a/src/core/ngx_queue.c b/src/core/ngx_queue.c > --- a/src/core/ngx_queue.c > +++ b/src/core/ngx_queue.c > @@ -9,6 +9,10 @@ > #include <ngx_core.h> > > > +static void ngx_queue_merge(ngx_queue_t *queue, ngx_queue_t *tail, > + ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)); > + > + > /* > * find the middle queue element if the queue has odd number of elements > * or the first element of the queue's second part otherwise > @@ -45,13 +49,13 @@ ngx_queue_middle(ngx_queue_t *queue) > } > > > -/* the stable insertion sort */ > +/* the stable merge sort */ > > void > ngx_queue_sort(ngx_queue_t *queue, > ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)) > { > - ngx_queue_t *q, *prev, *next; > + ngx_queue_t *q, tail; > > q = ngx_queue_head(queue); > > @@ -59,22 +63,44 @@ ngx_queue_sort(ngx_queue_t *queue, > return; > } > > - for (q = ngx_queue_next(q); q != ngx_queue_sentinel(queue); q = next) { > + q = ngx_queue_middle(queue); > + > + ngx_queue_split(queue, q, &tail); > + > + ngx_queue_sort(queue, cmp); > + ngx_queue_sort(&tail, cmp); > + > + ngx_queue_merge(queue, &tail, cmp); > +} > > - prev = ngx_queue_prev(q); > - next = ngx_queue_next(q); > > - ngx_queue_remove(q); > +static void > +ngx_queue_merge(ngx_queue_t *queue, ngx_queue_t *tail, > + ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)) > +{ > + ngx_queue_t *q1, *q2; > + > + q1 = ngx_queue_head(queue); > + q2 = ngx_queue_head(tail); > > - do { > - if (cmp(prev, q) <= 0) { > - break; > - } > + for ( ;; ) { > + if (q1 == ngx_queue_sentinel(queue)) { > + ngx_queue_add(queue, tail); > + break; > + } > + > + if (q2 == ngx_queue_sentinel(tail)) { > + break; > + } > > - prev = ngx_queue_prev(prev); > + if (cmp(q1, q2) <= 0) { > + q1 = ngx_queue_next(q1); > + continue; > + } > > - } while (prev != ngx_queue_sentinel(queue)); > + ngx_queue_remove(q2); > + ngx_queue_insert_before(q1, q2); > > - ngx_queue_insert_after(prev, q); > + q2 = ngx_queue_head(tail); > } > } > diff --git a/src/core/ngx_queue.h b/src/core/ngx_queue.h > --- a/src/core/ngx_queue.h > +++ b/src/core/ngx_queue.h > @@ -47,6 +47,9 @@ struct ngx_queue_s { > (h)->prev = x > > > +#define ngx_queue_insert_before ngx_queue_insert_tail > + > + > #define ngx_queue_head(h) > \ > (h)->next > > > > > > > > > > 2023年9月30日(土) 12:38 Maxim Dounin <[email protected]>: > > > > > > Hello! > > > > > > On Fri, Sep 22, 2023 at 03:58:41PM +0900, Yusuke Nojima wrote: > > > > > > > # HG changeset patch > > > > # User Yusuke Nojima <[email protected]> > > > > # Date 1679555707 -32400 > > > > # Thu Mar 23 16:15:07 2023 +0900 > > > > # Node ID 6aac98fb135e47ca9cf7ad7d780cf4a10e9aa55c > > > > # Parent 8771d35d55d0a2b1cefaab04401d6f837f5a05a2 > > > > Improve performance when starting nginx with a lot of locations > > > > > > > > Our team has a configuration file with a very large number of > > > > locations, and we found that starting nginx with this file takes an > > > > unacceptable amount of time. After investigating the issue, we > > > > discovered that the root cause of the long startup time is the sorting > > > > of the location list. > > > > > > Interesting. > > > > > > Could you please provide some more details about the use case, > > > such as how locations are used, and what is the approximate number > > > of locations being used? > > > > > > In my practice, it is extremely uncommon to use more than 1k-10k > > > prefix locations (and even these numbers are huge for normal > > > setups). Further, since each location contains configuration for > > > all modules, such configurations are expected to require a lot of > > > memory (currently about 5-10KB per empty location, so about > > > 50-100MB per 10k locations, and 0.5-1G per 100k locations). > > > Accordingly, using other approaches such as map (assuming exact > > > match is actually needed) might be beneficial regardless of the > > > sorting costs. > > > > > > Nevertheless, swapping the sorting algorithm to a faster one looks > > > like an obvious improvement. > > > > > > > > > > > Currently, the sorting algorithm used in nginx is insertion sort, > > > > which requires O(n^2) time for n locations. We have modified the > > > > sorting algorithm to use merge sort instead, which has a time > > > > complexity of O(n log n). > > > > > > > > We have tested the modified code using micro-benchmarks and confirmed > > > > that the new algorithm improves nginx startup time significantly > > > > (shown below). We believe that this change would be valuable for other > > > > users who are experiencing similar issues. > > > > > > > > > > > > Table: nginx startup time in seconds > > > > > > > > n current patched > > > > 2000 0.033 0.018 > > > > 4000 0.047 0.028 > > > > 6000 0.062 0.038 > > > > 8000 0.079 0.050 > > > > 10000 0.091 0.065 > > > > 12000 0.367 0.081 > > > > 14000 0.683 0.086 > > > > 16000 0.899 0.097 > > > > 18000 1.145 0.110 > > > > 20000 1.449 0.122 > > > > 22000 1.650 0.137 > > > > 24000 2.155 0.151 > > > > 26000 3.096 0.155 > > > > 28000 3.711 0.168 > > > > 30000 3.539 0.184 > > > > 32000 3.980 0.193 > > > > 34000 4.543 0.208 > > > > 36000 4.349 0.217 > > > > 38000 5.021 0.229 > > > > 40000 4.918 0.245 > > > > 42000 4.835 0.256 > > > > 44000 5.159 0.262 > > > > 46000 5.802 0.331 > > > > 48000 6.205 0.295 > > > > 50000 5.701 0.308 > > > > 52000 5.992 0.335 > > > > 54000 6.561 0.323 > > > > 56000 6.856 0.333 > > > > 58000 6.515 0.347 > > > > 60000 7.051 0.359 > > > > 62000 6.956 0.377 > > > > 64000 7.376 0.376 > > > > 66000 7.506 0.404 > > > > 68000 7.292 0.407 > > > > 70000 7.422 0.461 > > > > 72000 10.090 0.443 > > > > 74000 18.505 0.463 > > > > 76000 11.857 0.462 > > > > 78000 9.752 0.470 > > > > 80000 12.485 0.481 > > > > 82000 11.027 0.498 > > > > 84000 9.804 0.523 > > > > 86000 8.482 0.515 > > > > 88000 9.838 0.560 > > > > 90000 12.341 0.546 > > > > 92000 13.881 0.648 > > > > 94000 8.309 0.635 > > > > 96000 8.854 0.650 > > > > 98000 12.871 0.674 > > > > 100000 8.261 0.698 > > > > > > This probably can be reduced to something like 3-5 data points. > > > > > > > > > > > diff -r 8771d35d55d0 -r 6aac98fb135e src/core/ngx_queue.c > > > > --- a/src/core/ngx_queue.c Fri Mar 10 07:43:50 2023 +0300 > > > > +++ b/src/core/ngx_queue.c Thu Mar 23 16:15:07 2023 +0900 > > > > @@ -45,36 +45,103 @@ > > > > } > > > > > > > > > > > > -/* the stable insertion sort */ > > > > +/* merge queue2 into queue1. queue2 becomes empty after merge. */ > > > > + > > > > +static void > > > > +ngx_queue_merge(ngx_queue_t *queue1, ngx_queue_t *queue2, > > > > + ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)) > > > > +{ > > > > + ngx_queue_t *p1, *p2; > > > > > > Nitpicking: there are various style issues here and in other places. > > > > > > > + > > > > + p1 = ngx_queue_head(queue1); > > > > + p2 = ngx_queue_head(queue2); > > > > + > > > > + while (p1 != ngx_queue_sentinel(queue1) > > > > + && p2 != ngx_queue_sentinel(queue2)) { > > > > + > > > > + if (cmp(p1, p2) > 0) { > > > > + ngx_queue_t *next, *prev; > > > > + > > > > + next = ngx_queue_next(p2); > > > > + ngx_queue_remove(p2); > > > > + prev = ngx_queue_prev(p1); > > > > + ngx_queue_insert_after(prev, p2); > > > > + p2 = next; > > > > > > Nitpicking: there is no need to preserve "next" here, since p2 is > > > always the head of queue2 and, and the next element can be > > > obtained by ngx_queue_head(queue2). > > > > > > Also, instead of obtaining "prev" and using > > > ngx_queue_insert_after() it would be easier to use > > > ngx_queue_insert_before(). It is not currently defined, but it is > > > trivial to define one: it is an alias to ngx_queue_insert_tail(), > > > much like ngx_queue_insert_after() is an alias to > > > ngx_queue_insert_head(). > > > > > > > + } else { > > > > + p1 = ngx_queue_next(p1); > > > > + } > > > > + } > > > > + if (p2 != ngx_queue_sentinel(queue2)) { > > > > + ngx_queue_add(queue1, queue2); > > > > + ngx_queue_init(queue2); > > > > + } > > > > +} > > > > + > > > > + > > > > +/* move all elements from src to dest. dest should be empty before > > > > call. */ > > > > + > > > > +static void > > > > +ngx_queue_move(ngx_queue_t *dest, ngx_queue_t *src) > > > > +{ > > > > + *dest = *src; > > > > + ngx_queue_init(src); > > > > + > > > > + if (dest->next == src) { > > > > + dest->next = dest; > > > > + } else { > > > > + dest->next->prev = dest; > > > > + } > > > > + if (dest->prev == src) { > > > > + dest->prev = dest; > > > > + } else { > > > > + dest->prev->next = dest; > > > > + } > > > > +} > > > > > > This function looks strange to me. There is the ngx_queue_add() > > > macro, which probably should be used instead (if needed). > > > > > > > + > > > > + > > > > +/* the stable merge sort */ > > > > > > > > void > > > > ngx_queue_sort(ngx_queue_t *queue, > > > > ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)) > > > > { > > > > - ngx_queue_t *q, *prev, *next; > > > > + ngx_queue_t merged[64], *p, *last; > > > > > > > > - q = ngx_queue_head(queue); > > > > - > > > > - if (q == ngx_queue_last(queue)) { > > > > + if (ngx_queue_head(queue) == ngx_queue_last(queue)) { > > > > return; > > > > } > > > > > > > > - for (q = ngx_queue_next(q); q != ngx_queue_sentinel(queue); q = > > > > next) { > > > > + last = merged; > > > > > > > > - prev = ngx_queue_prev(q); > > > > - next = ngx_queue_next(q); > > > > + while (!ngx_queue_empty(queue)) { > > > > + /* > > > > + * Loop invariant: > > > > + * merged[i] must have exactly 0 or 2^i elements in sorted > > > > order. > > > > + * For each iteration, we take one element from the given > > > > queue and > > > > + * insert it into merged without violating the invariant > > > > condition. > > > > + */ > > > > > > > > - ngx_queue_remove(q); > > > > + ngx_queue_t carry, *h; > > > > + > > > > + h = ngx_queue_head(queue); > > > > + ngx_queue_remove(h); > > > > + ngx_queue_init(&carry); > > > > + ngx_queue_insert_head(&carry, h); > > > > > > > > - do { > > > > - if (cmp(prev, q) <= 0) { > > > > - break; > > > > - } > > > > + for (p = merged; p != last && !ngx_queue_empty(p); p++) { > > > > + ngx_queue_merge(p, &carry, cmp); > > > > + ngx_queue_move(&carry, p); > > > > + } > > > > + if (p == last) { > > > > + ngx_queue_init(last); > > > > + last++; > > > > + } > > > > + ngx_queue_move(p, &carry); > > > > + } > > > > > > > > - prev = ngx_queue_prev(prev); > > > > - > > > > - } while (prev != ngx_queue_sentinel(queue)); > > > > - > > > > - ngx_queue_insert_after(prev, q); > > > > + /* Merge all queues into one queue */ > > > > + for (p = merged + 1; p != last; p++) { > > > > + ngx_queue_merge(p, p-1, cmp); > > > > } > > > > + ngx_queue_move(queue, last-1); > > > > } > > > > > > While bottom-up merge sort implementation might be more efficient, > > > I find it disturbing to use fixed array of queues without any > > > checks if we are within the array bounds. > > > > > > Rather, I would suggest recursive top-bottom merge sort implementation > > > instead, which is much simpler and uses stack as temporary storage > > > (so it'll naturally die if there will be a queue which requires > > > more space for sorting than we have). > > > > > > Please take a look if it works for you: > > > > > > diff --git a/src/core/ngx_queue.c b/src/core/ngx_queue.c > > > --- a/src/core/ngx_queue.c > > > +++ b/src/core/ngx_queue.c > > > @@ -9,6 +9,10 @@ > > > #include <ngx_core.h> > > > > > > > > > +static void ngx_queue_merge(ngx_queue_t *queue, ngx_queue_t *tail, > > > + ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)); > > > + > > > + > > > /* > > > * find the middle queue element if the queue has odd number of elements > > > * or the first element of the queue's second part otherwise > > > @@ -45,13 +49,13 @@ ngx_queue_middle(ngx_queue_t *queue) > > > } > > > > > > > > > -/* the stable insertion sort */ > > > +/* the stable merge sort */ > > > > > > void > > > ngx_queue_sort(ngx_queue_t *queue, > > > ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)) > > > { > > > - ngx_queue_t *q, *prev, *next; > > > + ngx_queue_t *q, tail; > > > > > > q = ngx_queue_head(queue); > > > > > > @@ -59,22 +63,44 @@ ngx_queue_sort(ngx_queue_t *queue, > > > return; > > > } > > > > > > - for (q = ngx_queue_next(q); q != ngx_queue_sentinel(queue); q = > > > next) { > > > + q = ngx_queue_middle(queue); > > > + > > > + ngx_queue_split(queue, q, &tail); > > > + > > > + ngx_queue_sort(queue, cmp); > > > + ngx_queue_sort(&tail, cmp); > > > + > > > + ngx_queue_merge(queue, &tail, cmp); > > > +} > > > > > > - prev = ngx_queue_prev(q); > > > - next = ngx_queue_next(q); > > > > > > - ngx_queue_remove(q); > > > +static void > > > +ngx_queue_merge(ngx_queue_t *queue, ngx_queue_t *tail, > > > + ngx_int_t (*cmp)(const ngx_queue_t *, const ngx_queue_t *)) > > > +{ > > > + ngx_queue_t *q1, *q2; > > > + > > > + q1 = ngx_queue_head(queue); > > > + q2 = ngx_queue_head(tail); > > > > > > - do { > > > - if (cmp(prev, q) <= 0) { > > > - break; > > > - } > > > + for ( ;; ) { > > > + if (q1 == ngx_queue_sentinel(queue)) { > > > + ngx_queue_add(queue, tail); > > > + break; > > > + } > > > + > > > + if (q2 == ngx_queue_sentinel(tail)) { > > > + break; > > > + } > > > > > > - prev = ngx_queue_prev(prev); > > > + if (cmp(q1, q2) <= 0) { > > > + q1 = ngx_queue_next(q1); > > > + continue; > > > + } > > > > > > - } while (prev != ngx_queue_sentinel(queue)); > > > + ngx_queue_remove(q2); > > > + ngx_queue_insert_before(q1, q2); > > > > > > - ngx_queue_insert_after(prev, q); > > > + q2 = ngx_queue_head(tail); > > > } > > > } > > > diff --git a/src/core/ngx_queue.h b/src/core/ngx_queue.h > > > --- a/src/core/ngx_queue.h > > > +++ b/src/core/ngx_queue.h > > > @@ -47,6 +47,9 @@ struct ngx_queue_s { > > > (h)->prev = x > > > > > > > > > +#define ngx_queue_insert_before ngx_queue_insert_tail > > > + > > > + > > > #define ngx_queue_head(h) > > > \ > > > (h)->next > > > > > > > > > > > > -- > > > Maxim Dounin > > > http://mdounin.ru/ > > > _______________________________________________ > > > nginx-devel mailing list > > > [email protected] > > > https://mailman.nginx.org/mailman/listinfo/nginx-devel > > _______________________________________________ > > nginx-devel mailing list > > [email protected] > > https://mailman.nginx.org/mailman/listinfo/nginx-devel > > -- > Maxim Dounin > http://mdounin.ru/ > _______________________________________________ > nginx-devel mailing list > [email protected] > https://mailman.nginx.org/mailman/listinfo/nginx-devel _______________________________________________ nginx-devel mailing list [email protected] https://mailman.nginx.org/mailman/listinfo/nginx-devel
