This is a multi-part message in MIME format. --------------0F1DE22CD9273A65E1AE5118 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit
Howard Chu wrote: > Henrik Bohnenkamp wrote: >> On Mon, Jul 15, 2019 at 02:26:59PM +0100, Howard Chu wrote: >>> >>> Fyi, on our problematic test database with 11M entries and 3.7M aliases, a >>> search with -a always , starting from the >>> DB suffix, took 4 minutes without this patch, and 1235 minutes with this >>> patch. >>> >>> Needless to say, that's not looking good. Still checking other test cases. >> >> Interesting, so the behavior is reversed now :-). I assume you have >> found an alternative approach to solve the problem. That's fine with >> me, I want the problem solved, not my patch integrated. I'm of course >> interested in how you do it. Surely you did not get the 4 minutes with >> a stock 2.4.48 slapd? > > For this size of DB we needed the ITS#8977 patches to accommodate larger IDLs. > (I used 24 bits for IDLs, 16.7M slots) > Also at this size, the IDL processing itself is the main bottleneck now. We > would > need to switch to bitmaps or trees to avoid this bottleneck, but that's also a > much larger change than we can consider for this release. > I've set up a more modest test database along the lines of ITS#7657. It has 500,000 users, 30,000 aliases total, and 435 in ou=alias2 (all the rest under ou=alias1). For unpatched back-mdb: time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias1,dc=example,dc=com -a always # search result search: 2 result: 0 Success # numResponses: 29567 # numEntries: 29566 real 0m42.504s user 0m1.344s sys 0m2.996s time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias2,dc=example,dc=com -a always # search result search: 2 result: 0 Success # numResponses: 437 # numEntries: 436 real 0m48.406s user 0m0.040s sys 0m0.076s For back-mdb with e90e8c7d3c12d897bb0584ba04dc519d4f23acf9 time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias1,dc=example,dc=com -a always # search result search: 2 result: 0 Success # numResponses: 29567 # numEntries: 29566 real 0m5.500s user 0m1.516s sys 0m2.944s time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias2,dc=example,dc=com -a always # search result search: 2 result: 0 Success # numResponses: 437 # numEntries: 436 real 0m0.399s user 0m0.048s sys 0m0.060s For back-mdb with this ITS#8875 patch time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias1,dc=example,dc=com -a always # search result search: 2 result: 0 Success # numResponses: 29567 # numEntries: 29566 real 0m6.020s user 0m1.640s sys 0m3.372s time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias2,dc=example,dc=com -a always # search result search: 2 result: 0 Success # numResponses: 437 # numEntries: 436 real 0m0.203s user 0m0.052s sys 0m0.048s It seems close enough in this case (I didn't do enough repeated runs to average out any measurement error) while the committed patch performs better on the really ugly test case. The tool to generate the test LDIF is attached. It reads an LDIF containing 500,000 users on stdin, and outputs the same LDIF, with aliases interspersed, on stdout. -- -- Howard Chu CTO, Symas Corp. http://www.symas.com Director, Highland Sun http://highlandsun.com/hyc/ Chief Architect, OpenLDAP http://www.openldap.org/project/ --------------0F1DE22CD9273A65E1AE5118 Content-Type: text/x-csrc; name="mkalias.c" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="mkalias.c" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #define NUMUSERS 500000 #define NUMALIASES 30000 #define NUMSUBALIASES 435 static const char ou1[] = "\ dn: ou=alias1,dc=example,dc=com\n\ objectclass: top\n\ objectclass: organizationalUnit\n\ ou: alias1\n"; static const char ou2[] = "\ dn: ou=alias2,dc=example,dc=com\n\ objectclass: top\n\ objectclass: organizationalUnit\n\ ou: alias2\n"; int qcmp(const void *a, const void *b) { const int *i = a, *j = b; return *i - *j; } int main() { char line[1024]; int aliases[NUMALIASES]; int subaliases[NUMSUBALIASES]; int i; int gotuser = 0, nusers=0, naliases=0, nsubaliases=0; /* select a random subset of users to generate aliases */ srand(time(0L)); for (i=0; i<NUMALIASES; i++) { aliases[i] = rand() % NUMUSERS; } uniq1: qsort(aliases, NUMALIASES, sizeof(int), qcmp); /* make sure they're unique */ for (i=1; i<NUMALIASES; i++) { if (aliases[i-1] == aliases[i]) { aliases[i] = rand() % NUMUSERS; goto uniq1; } } /* select a radnom susbset of aliases for the target subtree */ for (i=0; i<NUMSUBALIASES; i++) { subaliases[i] = rand() % NUMALIASES; } uniq2: qsort(subaliases, NUMSUBALIASES, sizeof(int), qcmp); /* make sure they're unique */ for (i=1; i<NUMSUBALIASES; i++) { if (subaliases[i-1] == subaliases[i]) { subaliases[i] = rand() % NUMALIASES; goto uniq2; } } for (i=0; i<NUMSUBALIASES; i++) { subaliases[i] = aliases[subaliases[i]]; } while (fgets(line, sizeof(line), stdin) != NULL) { if (line[0] == '#') continue; if (!strncmp(line, "dn: ", 4)) { if (!gotuser) { if (!strncmp(line+4, "uid=", 4)) { gotuser = 1; puts(ou1); puts(ou2); } } else { if (nusers == aliases[naliases] || nusers == subaliases[nsubaliases]) { char *ou; int id; sscanf(line, "dn: uid=user.%d,", &id); if (nusers == subaliases[nsubaliases]) { ou = "alias2"; nsubaliases++; } else { ou = "alias1"; } naliases++; printf("dn: uid=x.%d,ou=%s,dc=example,dc=com\n", id, ou); puts("objectclass: alias"); puts("objectclass: extensibleObject"); printf("aliasedObjectName: %s\n", line+4); } nusers++; } } fputs(line, stdout); } return 0; } --------------0F1DE22CD9273A65E1AE5118--
