Module Name: src
Committed By: rillig
Date: Sun Apr 11 12:46:55 UTC 2021
Modified Files:
src/usr.bin/make: hash.c hash.h var.c
Log Message:
make: avoid allocating memory for simple variable names
The main change is in ParseVarname, where a Buffer is replaced with the
newly introduced LazyBuf. LazyBuf is inspired by
https://golang.org/src/path/path.go.
In CanonicalVarname, the pre-comparison of the first letter of the
variable name is no longer necessary. GCC 9 optimizes a fixed-length
memcmp so well that the code can finally be written to target human
readers, leaving the optimization to the compiler.
To generate a diff of this commit:
cvs rdiff -u -r1.63 -r1.64 src/usr.bin/make/hash.c
cvs rdiff -u -r1.39 -r1.40 src/usr.bin/make/hash.h
cvs rdiff -u -r1.915 -r1.916 src/usr.bin/make/var.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/usr.bin/make/hash.c
diff -u src/usr.bin/make/hash.c:1.63 src/usr.bin/make/hash.c:1.64
--- src/usr.bin/make/hash.c:1.63 Sat Apr 3 14:39:02 2021
+++ src/usr.bin/make/hash.c Sun Apr 11 12:46:54 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: hash.c,v 1.63 2021/04/03 14:39:02 rillig Exp $ */
+/* $NetBSD: hash.c,v 1.64 2021/04/11 12:46:54 rillig Exp $ */
/*
* Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -74,7 +74,7 @@
#include "make.h"
/* "@(#)hash.c 8.1 (Berkeley) 6/6/93" */
-MAKE_RCSID("$NetBSD: hash.c,v 1.63 2021/04/03 14:39:02 rillig Exp $");
+MAKE_RCSID("$NetBSD: hash.c,v 1.64 2021/04/11 12:46:54 rillig Exp $");
/*
* The ratio of # entries to # buckets at which we rebuild the table to
@@ -84,7 +84,7 @@ MAKE_RCSID("$NetBSD: hash.c,v 1.63 2021/
/* This hash function matches Gosling's Emacs and java.lang.String. */
static unsigned int
-hash(const char *key, size_t *out_keylen)
+Hash_String(const char *key, size_t *out_keylen)
{
unsigned int h;
const char *p;
@@ -98,10 +98,17 @@ hash(const char *key, size_t *out_keylen
return h;
}
+/* This hash function matches Gosling's Emacs and java.lang.String. */
unsigned int
-Hash_Hash(const char *key)
+Hash_Substring(Substring key)
{
- return hash(key, NULL);
+ unsigned int h;
+ const char *p;
+
+ h = 0;
+ for (p = key.start; p != key.end; p++)
+ h = 31 * h + (unsigned char)*p;
+ return h;
}
static HashEntry *
@@ -126,6 +133,41 @@ HashTable_Find(HashTable *t, unsigned in
return e;
}
+static bool
+HashEntry_KeyEquals(const HashEntry *he, Substring key)
+{
+ const char *heKey, *p;
+
+ heKey = he->key;
+ for (p = key.start; p != key.end; p++, heKey++)
+ if (*p != *heKey || *heKey == '\0')
+ return false;
+ return *heKey == '\0';
+}
+
+static HashEntry *
+HashTable_FindEntryBySubstring(HashTable *t, Substring key, unsigned int h)
+{
+ HashEntry *e;
+ unsigned int chainlen = 0;
+
+#ifdef DEBUG_HASH_LOOKUP
+ DEBUG4(HASH, "%s: %p h=%08x key=%.*s\n", __func__, t, h,
+ (int)Substring_Length(key), key.start);
+#endif
+
+ for (e = t->buckets[h & t->bucketsMask]; e != NULL; e = e->next) {
+ chainlen++;
+ if (e->key_hash == h && HashEntry_KeyEquals(e, key))
+ break;
+ }
+
+ if (chainlen > t->maxchain)
+ t->maxchain = chainlen;
+
+ return e;
+}
+
/* Set up the hash table. */
void
HashTable_Init(HashTable *t)
@@ -171,7 +213,7 @@ HashTable_Done(HashTable *t)
HashEntry *
HashTable_FindEntry(HashTable *t, const char *key)
{
- unsigned int h = hash(key, NULL);
+ unsigned int h = Hash_String(key, NULL);
return HashTable_Find(t, h, key);
}
@@ -188,9 +230,9 @@ HashTable_FindValue(HashTable *t, const
* or return NULL.
*/
void *
-HashTable_FindValueHash(HashTable *t, const char *key, unsigned int h)
+HashTable_FindValueBySubstringHash(HashTable *t, Substring key, unsigned int h)
{
- HashEntry *he = HashTable_Find(t, h, key);
+ HashEntry *he = HashTable_FindEntryBySubstring(t, key, h);
return he != NULL ? he->value : NULL;
}
@@ -239,7 +281,7 @@ HashEntry *
HashTable_CreateEntry(HashTable *t, const char *key, bool *out_isNew)
{
size_t keylen;
- unsigned int h = hash(key, &keylen);
+ unsigned int h = Hash_String(key, &keylen);
HashEntry *he = HashTable_Find(t, h, key);
if (he != NULL) {
Index: src/usr.bin/make/hash.h
diff -u src/usr.bin/make/hash.h:1.39 src/usr.bin/make/hash.h:1.40
--- src/usr.bin/make/hash.h:1.39 Sat Apr 3 11:08:40 2021
+++ src/usr.bin/make/hash.h Sun Apr 11 12:46:54 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: hash.h,v 1.39 2021/04/03 11:08:40 rillig Exp $ */
+/* $NetBSD: hash.h,v 1.40 2021/04/11 12:46:54 rillig Exp $ */
/*
* Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -124,8 +124,8 @@ void HashTable_Init(HashTable *);
void HashTable_Done(HashTable *);
HashEntry *HashTable_FindEntry(HashTable *, const char *);
void *HashTable_FindValue(HashTable *, const char *);
-unsigned int Hash_Hash(const char *);
-void *HashTable_FindValueHash(HashTable *, const char *, unsigned int);
+unsigned int Hash_Substring(Substring);
+void *HashTable_FindValueBySubstringHash(HashTable *, Substring, unsigned int);
HashEntry *HashTable_CreateEntry(HashTable *, const char *, bool *);
HashEntry *HashTable_Set(HashTable *, const char *, void *);
void HashTable_DeleteEntry(HashTable *, HashEntry *);
Index: src/usr.bin/make/var.c
diff -u src/usr.bin/make/var.c:1.915 src/usr.bin/make/var.c:1.916
--- src/usr.bin/make/var.c:1.915 Sat Apr 10 22:40:34 2021
+++ src/usr.bin/make/var.c Sun Apr 11 12:46:54 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: var.c,v 1.915 2021/04/10 22:40:34 rillig Exp $ */
+/* $NetBSD: var.c,v 1.916 2021/04/11 12:46:54 rillig Exp $ */
/*
* Copyright (c) 1988, 1989, 1990, 1993
@@ -140,7 +140,7 @@
#include "metachar.h"
/* "@(#)var.c 8.3 (Berkeley) 3/19/94" */
-MAKE_RCSID("$NetBSD: var.c,v 1.915 2021/04/10 22:40:34 rillig Exp $");
+MAKE_RCSID("$NetBSD: var.c,v 1.916 2021/04/11 12:46:54 rillig Exp $");
/*
* Variables are defined using one of the VAR=value assignments. Their
@@ -345,45 +345,30 @@ VarNew(FStr name, const char *value, boo
return var;
}
-static const char *
-CanonicalVarname(const char *name)
+static Substring
+CanonicalVarname(Substring name)
{
- if (*name == '.' && ch_isupper(name[1])) {
- switch (name[1]) {
- case 'A':
- if (strcmp(name, ".ALLSRC") == 0)
- name = ALLSRC;
- if (strcmp(name, ".ARCHIVE") == 0)
- name = ARCHIVE;
- break;
- case 'I':
- if (strcmp(name, ".IMPSRC") == 0)
- name = IMPSRC;
- break;
- case 'M':
- if (strcmp(name, ".MEMBER") == 0)
- name = MEMBER;
- break;
- case 'O':
- if (strcmp(name, ".OODATE") == 0)
- name = OODATE;
- break;
- case 'P':
- if (strcmp(name, ".PREFIX") == 0)
- name = PREFIX;
- break;
- case 'S':
- if (strcmp(name, ".SHELL") == 0) {
- if (shellPath == NULL)
- Shell_Init();
- }
- break;
- case 'T':
- if (strcmp(name, ".TARGET") == 0)
- name = TARGET;
- break;
- }
- }
+
+ if (!(Substring_Length(name) > 0 && name.start[0] == '.'))
+ return name;
+
+ if (Substring_Equals(name, ".ALLSRC"))
+ return Substring_InitStr(ALLSRC);
+ if (Substring_Equals(name, ".ARCHIVE"))
+ return Substring_InitStr(ARCHIVE);
+ if (Substring_Equals(name, ".IMPSRC"))
+ return Substring_InitStr(IMPSRC);
+ if (Substring_Equals(name, ".MEMBER"))
+ return Substring_InitStr(MEMBER);
+ if (Substring_Equals(name, ".OODATE"))
+ return Substring_InitStr(OODATE);
+ if (Substring_Equals(name, ".PREFIX"))
+ return Substring_InitStr(PREFIX);
+ if (Substring_Equals(name, ".TARGET"))
+ return Substring_InitStr(TARGET);
+
+ if (Substring_Equals(name, ".SHELL") && shellPath == NULL)
+ Shell_Init();
/* GNU make has an additional alias $^ == ${.ALLSRC}. */
@@ -391,9 +376,9 @@ CanonicalVarname(const char *name)
}
static Var *
-GNode_FindVar(GNode *scope, const char *varname, unsigned int hash)
+GNode_FindVar(GNode *scope, Substring varname, unsigned int hash)
{
- return HashTable_FindValueHash(&scope->vars, varname, hash);
+ return HashTable_FindValueBySubstringHash(&scope->vars, varname, hash);
}
/*
@@ -410,14 +395,14 @@ GNode_FindVar(GNode *scope, const char *
* VarFreeEnv after use.
*/
static Var *
-VarFind(const char *name, GNode *scope, bool elsewhere)
+VarFindSubstring(Substring name, GNode *scope, bool elsewhere)
{
Var *var;
unsigned int nameHash;
/* Replace '.TARGET' with '@', likewise for other local variables. */
name = CanonicalVarname(name);
- nameHash = Hash_Hash(name);
+ nameHash = Hash_Substring(name);
var = GNode_FindVar(scope, name, nameHash);
if (!elsewhere)
@@ -435,17 +420,19 @@ VarFind(const char *name, GNode *scope,
}
if (var == NULL) {
- char *env;
+ FStr envName;
+ const char *envValue;
/*
* TODO: try setting an environment variable with the empty
* name, which should be technically possible, just to see
* how make reacts. All .for loops should be broken then.
*/
- if ((env = getenv(name)) != NULL) {
- char *varname = bmake_strdup(name);
- return VarNew(FStr_InitOwn(varname), env, true, false);
- }
+ envName = Substring_Str(name);
+ envValue = getenv(envName.str);
+ if (envValue != NULL)
+ return VarNew(envName, envValue, true, false);
+ FStr_Done(&envName);
if (opts.checkEnvFirst && scope != SCOPE_GLOBAL) {
var = GNode_FindVar(SCOPE_GLOBAL, name, nameHash);
@@ -461,6 +448,13 @@ VarFind(const char *name, GNode *scope,
return var;
}
+/* TODO: Replace these calls with VarFindSubstring, as far as possible. */
+static Var *
+VarFind(const char *name, GNode *scope, bool elsewhere)
+{
+ return VarFindSubstring(Substring_InitStr(name), scope, elsewhere);
+}
+
/* If the variable is an environment variable, free it, including its value. */
static void
VarFreeEnv(Var *v)
@@ -4006,12 +4000,17 @@ cleanup:
}
/*
- * Only four of the local variables are treated specially as they are the
- * only four that will be set when dynamic sources are expanded.
+ * Only 4 of the 7 local variables are treated specially as they are the only
+ * ones that will be set when dynamic sources are expanded.
*/
static bool
-VarnameIsDynamic(const char *name, size_t len)
+VarnameIsDynamic(Substring varname)
{
+ const char *name;
+ size_t len;
+
+ name = varname.start;
+ len = Substring_Length(varname);
if (len == 1 || (len == 2 && (name[1] == 'F' || name[1] == 'D'))) {
switch (name[0]) {
case '@':
@@ -4024,10 +4023,10 @@ VarnameIsDynamic(const char *name, size_
}
if ((len == 7 || len == 8) && name[0] == '.' && ch_isupper(name[1])) {
- return strcmp(name, ".TARGET") == 0 ||
- strcmp(name, ".ARCHIVE") == 0 ||
- strcmp(name, ".PREFIX") == 0 ||
- strcmp(name, ".MEMBER") == 0;
+ return Substring_Equals(varname, ".TARGET") ||
+ Substring_Equals(varname, ".ARCHIVE") ||
+ Substring_Equals(varname, ".PREFIX") ||
+ Substring_Equals(varname, ".MEMBER");
}
return false;
@@ -4064,16 +4063,15 @@ UndefinedShortVarValue(char varname, con
* Parse a variable name, until the end character or a colon, whichever
* comes first.
*/
-static char *
+static void
ParseVarname(const char **pp, char startc, char endc,
GNode *scope, VarEvalMode emode,
- size_t *out_varname_len)
+ LazyBuf *buf)
{
- Buffer buf;
const char *p = *pp;
int depth = 0; /* Track depth so we can spot parse errors. */
- Buf_Init(&buf);
+ LazyBuf_Init(buf, Substring_InitStr(p));
while (*p != '\0') {
if ((*p == endc || *p == ':') && depth == 0)
@@ -4088,16 +4086,14 @@ ParseVarname(const char **pp, char start
FStr nested_val;
(void)Var_Parse(&p, scope, emode, &nested_val);
/* TODO: handle errors */
- Buf_AddStr(&buf, nested_val.str);
+ LazyBuf_AddStr(buf, nested_val.str);
FStr_Done(&nested_val);
} else {
- Buf_AddByte(&buf, *p);
+ LazyBuf_Add(buf, *p);
p++;
}
}
*pp = p;
- *out_varname_len = buf.len;
- return Buf_DoneData(&buf);
}
static VarParseResult
@@ -4186,65 +4182,52 @@ ParseVarnameShort(char varname, const ch
/* Find variables like @F or <D. */
static Var *
-FindLocalLegacyVar(const char *varname, size_t namelen, GNode *scope,
+FindLocalLegacyVar(Substring varname, GNode *scope,
const char **out_extraModifiers)
{
+ Var *v;
+
/* Only resolve these variables if scope is a "real" target. */
if (scope == SCOPE_CMDLINE || scope == SCOPE_GLOBAL)
return NULL;
- if (namelen != 2)
+ if (Substring_Length(varname) != 2)
return NULL;
- if (varname[1] != 'F' && varname[1] != 'D')
+ if (varname.start[1] != 'F' && varname.start[1] != 'D')
return NULL;
- if (strchr("@%?*!<>", varname[0]) == NULL)
+ if (strchr("@%?*!<>", varname.start[0]) == NULL)
return NULL;
- {
- char name[2];
- Var *v;
+ v = VarFindSubstring(Substring_Sub(varname, 0, 1), scope, false);
+ if (v == NULL)
+ return NULL;
- name[0] = varname[0];
- name[1] = '\0';
- v = VarFind(name, scope, false);
-
- if (v != NULL) {
- if (varname[1] == 'D') {
- *out_extraModifiers = "H:";
- } else { /* F */
- *out_extraModifiers = "T:";
- }
- }
- return v;
- }
+ *out_extraModifiers = varname.start[1] == 'D' ? "H:" : "T:";
+ return v;
}
static VarParseResult
-EvalUndefined(bool dynamic, const char *start, const char *p, char *varname,
- VarEvalMode emode,
- FStr *out_val)
+EvalUndefined(bool dynamic, const char *start, const char *p,
+ Substring varname, VarEvalMode emode, FStr *out_val)
{
if (dynamic) {
*out_val = FStr_InitOwn(bmake_strsedup(start, p));
- free(varname);
return VPR_OK;
}
if (emode == VARE_UNDEFERR && opts.strict) {
Parse_Error(PARSE_FATAL,
- "Variable \"%s\" is undefined", varname);
- free(varname);
+ "Variable \"%.*s\" is undefined",
+ (int)Substring_Length(varname), varname.start);
*out_val = FStr_InitRefer(var_Error);
return VPR_ERR;
}
if (emode == VARE_UNDEFERR) {
- free(varname);
*out_val = FStr_InitRefer(var_Error);
return VPR_UNDEF; /* XXX: Should be VPR_ERR instead. */
}
- free(varname);
*out_val = FStr_InitRefer(varUndefined);
return VPR_OK;
}
@@ -4275,8 +4258,7 @@ ParseVarnameLong(
ExprDefined *out_true_exprDefined
)
{
- size_t namelen;
- char *varname;
+ LazyBuf varname;
Var *v;
bool haveModifier;
bool dynamic = false;
@@ -4285,28 +4267,30 @@ ParseVarnameLong(
char endc = startc == '(' ? ')' : '}';
p += 2; /* skip "${" or "$(" or "y(" */
- varname = ParseVarname(&p, startc, endc, scope, emode, &namelen);
+ ParseVarname(&p, startc, endc, scope, emode, &varname);
if (*p == ':') {
haveModifier = true;
} else if (*p == endc) {
haveModifier = false;
} else {
- Parse_Error(PARSE_FATAL, "Unclosed variable \"%s\"", varname);
- free(varname);
+ Substring name = LazyBuf_Get(&varname);
+ Parse_Error(PARSE_FATAL, "Unclosed variable \"%.*s\"",
+ (int)Substring_Length(name), name.start);
+ LazyBuf_Done(&varname);
*out_false_pp = p;
*out_false_val = FStr_InitRefer(var_Error);
*out_false_res = VPR_ERR;
return false;
}
- v = VarFind(varname, scope, true);
+ v = VarFindSubstring(LazyBuf_Get(&varname), scope, true);
/* At this point, p points just after the variable name,
* either at ':' or at endc. */
if (v == NULL) {
- v = FindLocalLegacyVar(varname, namelen, scope,
+ v = FindLocalLegacyVar(LazyBuf_Get(&varname), scope,
out_true_extraModifiers);
}
@@ -4315,14 +4299,14 @@ ParseVarnameLong(
* Defer expansion of dynamic variables if they appear in
* non-local scope since they are not defined there.
*/
- dynamic = VarnameIsDynamic(varname, namelen) &&
+ dynamic = VarnameIsDynamic(LazyBuf_Get(&varname)) &&
(scope == SCOPE_CMDLINE || scope == SCOPE_GLOBAL);
if (!haveModifier) {
p++; /* skip endc */
*out_false_pp = p;
*out_false_res = EvalUndefined(dynamic, start, p,
- varname, emode, out_false_val);
+ LazyBuf_Get(&varname), emode, out_false_val);
return false;
}
@@ -4340,10 +4324,10 @@ ParseVarnameLong(
* is still undefined, Var_Parse will return an empty string
* instead of the actually computed value.
*/
- v = VarNew(FStr_InitOwn(varname), "", false, false);
+ v = VarNew(LazyBuf_DoneGet(&varname), "", false, false);
*out_true_exprDefined = DEF_UNDEF;
} else
- free(varname);
+ LazyBuf_Done(&varname);
*out_true_endc = endc;
*out_true_p = p;