Module Name:    src
Committed By:   rillig
Date:           Sun Apr 11 12:46:55 UTC 2021

Modified Files:
        src/usr.bin/make: hash.c hash.h var.c

Log Message:
make: avoid allocating memory for simple variable names

The main change is in ParseVarname, where a Buffer is replaced with the
newly introduced LazyBuf.  LazyBuf is inspired by
https://golang.org/src/path/path.go.

In CanonicalVarname, the pre-comparison of the first letter of the
variable name is no longer necessary.  GCC 9 optimizes a fixed-length
memcmp so well that the code can finally be written to target human
readers, leaving the optimization to the compiler.


To generate a diff of this commit:
cvs rdiff -u -r1.63 -r1.64 src/usr.bin/make/hash.c
cvs rdiff -u -r1.39 -r1.40 src/usr.bin/make/hash.h
cvs rdiff -u -r1.915 -r1.916 src/usr.bin/make/var.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/make/hash.c
diff -u src/usr.bin/make/hash.c:1.63 src/usr.bin/make/hash.c:1.64
--- src/usr.bin/make/hash.c:1.63	Sat Apr  3 14:39:02 2021
+++ src/usr.bin/make/hash.c	Sun Apr 11 12:46:54 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: hash.c,v 1.63 2021/04/03 14:39:02 rillig Exp $	*/
+/*	$NetBSD: hash.c,v 1.64 2021/04/11 12:46:54 rillig Exp $	*/
 
 /*
  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -74,7 +74,7 @@
 #include "make.h"
 
 /*	"@(#)hash.c	8.1 (Berkeley) 6/6/93"	*/
-MAKE_RCSID("$NetBSD: hash.c,v 1.63 2021/04/03 14:39:02 rillig Exp $");
+MAKE_RCSID("$NetBSD: hash.c,v 1.64 2021/04/11 12:46:54 rillig Exp $");
 
 /*
  * The ratio of # entries to # buckets at which we rebuild the table to
@@ -84,7 +84,7 @@ MAKE_RCSID("$NetBSD: hash.c,v 1.63 2021/
 
 /* This hash function matches Gosling's Emacs and java.lang.String. */
 static unsigned int
-hash(const char *key, size_t *out_keylen)
+Hash_String(const char *key, size_t *out_keylen)
 {
 	unsigned int h;
 	const char *p;
@@ -98,10 +98,17 @@ hash(const char *key, size_t *out_keylen
 	return h;
 }
 
+/* This hash function matches Gosling's Emacs and java.lang.String. */
 unsigned int
-Hash_Hash(const char *key)
+Hash_Substring(Substring key)
 {
-	return hash(key, NULL);
+	unsigned int h;
+	const char *p;
+
+	h = 0;
+	for (p = key.start; p != key.end; p++)
+		h = 31 * h + (unsigned char)*p;
+	return h;
 }
 
 static HashEntry *
@@ -126,6 +133,41 @@ HashTable_Find(HashTable *t, unsigned in
 	return e;
 }
 
+static bool
+HashEntry_KeyEquals(const HashEntry *he, Substring key)
+{
+	const char *heKey, *p;
+
+	heKey = he->key;
+	for (p = key.start; p != key.end; p++, heKey++)
+		if (*p != *heKey || *heKey == '\0')
+			return false;
+	return *heKey == '\0';
+}
+
+static HashEntry *
+HashTable_FindEntryBySubstring(HashTable *t, Substring key, unsigned int h)
+{
+	HashEntry *e;
+	unsigned int chainlen = 0;
+
+#ifdef DEBUG_HASH_LOOKUP
+	DEBUG4(HASH, "%s: %p h=%08x key=%.*s\n", __func__, t, h,
+	    (int)Substring_Length(key), key.start);
+#endif
+
+	for (e = t->buckets[h & t->bucketsMask]; e != NULL; e = e->next) {
+		chainlen++;
+		if (e->key_hash == h && HashEntry_KeyEquals(e, key))
+			break;
+	}
+
+	if (chainlen > t->maxchain)
+		t->maxchain = chainlen;
+
+	return e;
+}
+
 /* Set up the hash table. */
 void
 HashTable_Init(HashTable *t)
@@ -171,7 +213,7 @@ HashTable_Done(HashTable *t)
 HashEntry *
 HashTable_FindEntry(HashTable *t, const char *key)
 {
-	unsigned int h = hash(key, NULL);
+	unsigned int h = Hash_String(key, NULL);
 	return HashTable_Find(t, h, key);
 }
 
@@ -188,9 +230,9 @@ HashTable_FindValue(HashTable *t, const 
  * or return NULL.
  */
 void *
-HashTable_FindValueHash(HashTable *t, const char *key, unsigned int h)
+HashTable_FindValueBySubstringHash(HashTable *t, Substring key, unsigned int h)
 {
-	HashEntry *he = HashTable_Find(t, h, key);
+	HashEntry *he = HashTable_FindEntryBySubstring(t, key, h);
 	return he != NULL ? he->value : NULL;
 }
 
@@ -239,7 +281,7 @@ HashEntry *
 HashTable_CreateEntry(HashTable *t, const char *key, bool *out_isNew)
 {
 	size_t keylen;
-	unsigned int h = hash(key, &keylen);
+	unsigned int h = Hash_String(key, &keylen);
 	HashEntry *he = HashTable_Find(t, h, key);
 
 	if (he != NULL) {

Index: src/usr.bin/make/hash.h
diff -u src/usr.bin/make/hash.h:1.39 src/usr.bin/make/hash.h:1.40
--- src/usr.bin/make/hash.h:1.39	Sat Apr  3 11:08:40 2021
+++ src/usr.bin/make/hash.h	Sun Apr 11 12:46:54 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: hash.h,v 1.39 2021/04/03 11:08:40 rillig Exp $	*/
+/*	$NetBSD: hash.h,v 1.40 2021/04/11 12:46:54 rillig Exp $	*/
 
 /*
  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -124,8 +124,8 @@ void HashTable_Init(HashTable *);
 void HashTable_Done(HashTable *);
 HashEntry *HashTable_FindEntry(HashTable *, const char *);
 void *HashTable_FindValue(HashTable *, const char *);
-unsigned int Hash_Hash(const char *);
-void *HashTable_FindValueHash(HashTable *, const char *, unsigned int);
+unsigned int Hash_Substring(Substring);
+void *HashTable_FindValueBySubstringHash(HashTable *, Substring, unsigned int);
 HashEntry *HashTable_CreateEntry(HashTable *, const char *, bool *);
 HashEntry *HashTable_Set(HashTable *, const char *, void *);
 void HashTable_DeleteEntry(HashTable *, HashEntry *);

Index: src/usr.bin/make/var.c
diff -u src/usr.bin/make/var.c:1.915 src/usr.bin/make/var.c:1.916
--- src/usr.bin/make/var.c:1.915	Sat Apr 10 22:40:34 2021
+++ src/usr.bin/make/var.c	Sun Apr 11 12:46:54 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: var.c,v 1.915 2021/04/10 22:40:34 rillig Exp $	*/
+/*	$NetBSD: var.c,v 1.916 2021/04/11 12:46:54 rillig Exp $	*/
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -140,7 +140,7 @@
 #include "metachar.h"
 
 /*	"@(#)var.c	8.3 (Berkeley) 3/19/94" */
-MAKE_RCSID("$NetBSD: var.c,v 1.915 2021/04/10 22:40:34 rillig Exp $");
+MAKE_RCSID("$NetBSD: var.c,v 1.916 2021/04/11 12:46:54 rillig Exp $");
 
 /*
  * Variables are defined using one of the VAR=value assignments.  Their
@@ -345,45 +345,30 @@ VarNew(FStr name, const char *value, boo
 	return var;
 }
 
-static const char *
-CanonicalVarname(const char *name)
+static Substring
+CanonicalVarname(Substring name)
 {
-	if (*name == '.' && ch_isupper(name[1])) {
-		switch (name[1]) {
-		case 'A':
-			if (strcmp(name, ".ALLSRC") == 0)
-				name = ALLSRC;
-			if (strcmp(name, ".ARCHIVE") == 0)
-				name = ARCHIVE;
-			break;
-		case 'I':
-			if (strcmp(name, ".IMPSRC") == 0)
-				name = IMPSRC;
-			break;
-		case 'M':
-			if (strcmp(name, ".MEMBER") == 0)
-				name = MEMBER;
-			break;
-		case 'O':
-			if (strcmp(name, ".OODATE") == 0)
-				name = OODATE;
-			break;
-		case 'P':
-			if (strcmp(name, ".PREFIX") == 0)
-				name = PREFIX;
-			break;
-		case 'S':
-			if (strcmp(name, ".SHELL") == 0) {
-				if (shellPath == NULL)
-					Shell_Init();
-			}
-			break;
-		case 'T':
-			if (strcmp(name, ".TARGET") == 0)
-				name = TARGET;
-			break;
-		}
-	}
+
+	if (!(Substring_Length(name) > 0 && name.start[0] == '.'))
+		return name;
+
+	if (Substring_Equals(name, ".ALLSRC"))
+		return Substring_InitStr(ALLSRC);
+	if (Substring_Equals(name, ".ARCHIVE"))
+		return Substring_InitStr(ARCHIVE);
+	if (Substring_Equals(name, ".IMPSRC"))
+		return Substring_InitStr(IMPSRC);
+	if (Substring_Equals(name, ".MEMBER"))
+		return Substring_InitStr(MEMBER);
+	if (Substring_Equals(name, ".OODATE"))
+		return Substring_InitStr(OODATE);
+	if (Substring_Equals(name, ".PREFIX"))
+		return Substring_InitStr(PREFIX);
+	if (Substring_Equals(name, ".TARGET"))
+		return Substring_InitStr(TARGET);
+
+	if (Substring_Equals(name, ".SHELL") && shellPath == NULL)
+		Shell_Init();
 
 	/* GNU make has an additional alias $^ == ${.ALLSRC}. */
 
@@ -391,9 +376,9 @@ CanonicalVarname(const char *name)
 }
 
 static Var *
-GNode_FindVar(GNode *scope, const char *varname, unsigned int hash)
+GNode_FindVar(GNode *scope, Substring varname, unsigned int hash)
 {
-	return HashTable_FindValueHash(&scope->vars, varname, hash);
+	return HashTable_FindValueBySubstringHash(&scope->vars, varname, hash);
 }
 
 /*
@@ -410,14 +395,14 @@ GNode_FindVar(GNode *scope, const char *
  *	VarFreeEnv after use.
  */
 static Var *
-VarFind(const char *name, GNode *scope, bool elsewhere)
+VarFindSubstring(Substring name, GNode *scope, bool elsewhere)
 {
 	Var *var;
 	unsigned int nameHash;
 
 	/* Replace '.TARGET' with '@', likewise for other local variables. */
 	name = CanonicalVarname(name);
-	nameHash = Hash_Hash(name);
+	nameHash = Hash_Substring(name);
 
 	var = GNode_FindVar(scope, name, nameHash);
 	if (!elsewhere)
@@ -435,17 +420,19 @@ VarFind(const char *name, GNode *scope, 
 	}
 
 	if (var == NULL) {
-		char *env;
+		FStr envName;
+		const char *envValue;
 
 		/*
 		 * TODO: try setting an environment variable with the empty
 		 *  name, which should be technically possible, just to see
 		 *  how make reacts.  All .for loops should be broken then.
 		 */
-		if ((env = getenv(name)) != NULL) {
-			char *varname = bmake_strdup(name);
-			return VarNew(FStr_InitOwn(varname), env, true, false);
-		}
+		envName = Substring_Str(name);
+		envValue = getenv(envName.str);
+		if (envValue != NULL)
+			return VarNew(envName, envValue, true, false);
+		FStr_Done(&envName);
 
 		if (opts.checkEnvFirst && scope != SCOPE_GLOBAL) {
 			var = GNode_FindVar(SCOPE_GLOBAL, name, nameHash);
@@ -461,6 +448,13 @@ VarFind(const char *name, GNode *scope, 
 	return var;
 }
 
+/* TODO: Replace these calls with VarFindSubstring, as far as possible. */
+static Var *
+VarFind(const char *name, GNode *scope, bool elsewhere)
+{
+	return VarFindSubstring(Substring_InitStr(name), scope, elsewhere);
+}
+
 /* If the variable is an environment variable, free it, including its value. */
 static void
 VarFreeEnv(Var *v)
@@ -4006,12 +4000,17 @@ cleanup:
 }
 
 /*
- * Only four of the local variables are treated specially as they are the
- * only four that will be set when dynamic sources are expanded.
+ * Only 4 of the 7 local variables are treated specially as they are the only
+ * ones that will be set when dynamic sources are expanded.
  */
 static bool
-VarnameIsDynamic(const char *name, size_t len)
+VarnameIsDynamic(Substring varname)
 {
+	const char *name;
+	size_t len;
+
+	name = varname.start;
+	len = Substring_Length(varname);
 	if (len == 1 || (len == 2 && (name[1] == 'F' || name[1] == 'D'))) {
 		switch (name[0]) {
 		case '@':
@@ -4024,10 +4023,10 @@ VarnameIsDynamic(const char *name, size_
 	}
 
 	if ((len == 7 || len == 8) && name[0] == '.' && ch_isupper(name[1])) {
-		return strcmp(name, ".TARGET") == 0 ||
-		       strcmp(name, ".ARCHIVE") == 0 ||
-		       strcmp(name, ".PREFIX") == 0 ||
-		       strcmp(name, ".MEMBER") == 0;
+		return Substring_Equals(varname, ".TARGET") ||
+		       Substring_Equals(varname, ".ARCHIVE") ||
+		       Substring_Equals(varname, ".PREFIX") ||
+		       Substring_Equals(varname, ".MEMBER");
 	}
 
 	return false;
@@ -4064,16 +4063,15 @@ UndefinedShortVarValue(char varname, con
  * Parse a variable name, until the end character or a colon, whichever
  * comes first.
  */
-static char *
+static void
 ParseVarname(const char **pp, char startc, char endc,
 	     GNode *scope, VarEvalMode emode,
-	     size_t *out_varname_len)
+	     LazyBuf *buf)
 {
-	Buffer buf;
 	const char *p = *pp;
 	int depth = 0;		/* Track depth so we can spot parse errors. */
 
-	Buf_Init(&buf);
+	LazyBuf_Init(buf, Substring_InitStr(p));
 
 	while (*p != '\0') {
 		if ((*p == endc || *p == ':') && depth == 0)
@@ -4088,16 +4086,14 @@ ParseVarname(const char **pp, char start
 			FStr nested_val;
 			(void)Var_Parse(&p, scope, emode, &nested_val);
 			/* TODO: handle errors */
-			Buf_AddStr(&buf, nested_val.str);
+			LazyBuf_AddStr(buf, nested_val.str);
 			FStr_Done(&nested_val);
 		} else {
-			Buf_AddByte(&buf, *p);
+			LazyBuf_Add(buf, *p);
 			p++;
 		}
 	}
 	*pp = p;
-	*out_varname_len = buf.len;
-	return Buf_DoneData(&buf);
 }
 
 static VarParseResult
@@ -4186,65 +4182,52 @@ ParseVarnameShort(char varname, const ch
 
 /* Find variables like @F or <D. */
 static Var *
-FindLocalLegacyVar(const char *varname, size_t namelen, GNode *scope,
+FindLocalLegacyVar(Substring varname, GNode *scope,
 		   const char **out_extraModifiers)
 {
+	Var *v;
+
 	/* Only resolve these variables if scope is a "real" target. */
 	if (scope == SCOPE_CMDLINE || scope == SCOPE_GLOBAL)
 		return NULL;
 
-	if (namelen != 2)
+	if (Substring_Length(varname) != 2)
 		return NULL;
-	if (varname[1] != 'F' && varname[1] != 'D')
+	if (varname.start[1] != 'F' && varname.start[1] != 'D')
 		return NULL;
-	if (strchr("@%?*!<>", varname[0]) == NULL)
+	if (strchr("@%?*!<>", varname.start[0]) == NULL)
 		return NULL;
 
-	{
-		char name[2];
-		Var *v;
+	v = VarFindSubstring(Substring_Sub(varname, 0, 1), scope, false);
+	if (v == NULL)
+		return NULL;
 
-		name[0] = varname[0];
-		name[1] = '\0';
-		v = VarFind(name, scope, false);
-
-		if (v != NULL) {
-			if (varname[1] == 'D') {
-				*out_extraModifiers = "H:";
-			} else { /* F */
-				*out_extraModifiers = "T:";
-			}
-		}
-		return v;
-	}
+	*out_extraModifiers = varname.start[1] == 'D' ? "H:" : "T:";
+	return v;
 }
 
 static VarParseResult
-EvalUndefined(bool dynamic, const char *start, const char *p, char *varname,
-	      VarEvalMode emode,
-	      FStr *out_val)
+EvalUndefined(bool dynamic, const char *start, const char *p,
+	      Substring varname, VarEvalMode emode, FStr *out_val)
 {
 	if (dynamic) {
 		*out_val = FStr_InitOwn(bmake_strsedup(start, p));
-		free(varname);
 		return VPR_OK;
 	}
 
 	if (emode == VARE_UNDEFERR && opts.strict) {
 		Parse_Error(PARSE_FATAL,
-		    "Variable \"%s\" is undefined", varname);
-		free(varname);
+		    "Variable \"%.*s\" is undefined",
+		    (int)Substring_Length(varname), varname.start);
 		*out_val = FStr_InitRefer(var_Error);
 		return VPR_ERR;
 	}
 
 	if (emode == VARE_UNDEFERR) {
-		free(varname);
 		*out_val = FStr_InitRefer(var_Error);
 		return VPR_UNDEF;	/* XXX: Should be VPR_ERR instead. */
 	}
 
-	free(varname);
 	*out_val = FStr_InitRefer(varUndefined);
 	return VPR_OK;
 }
@@ -4275,8 +4258,7 @@ ParseVarnameLong(
 	ExprDefined *out_true_exprDefined
 )
 {
-	size_t namelen;
-	char *varname;
+	LazyBuf varname;
 	Var *v;
 	bool haveModifier;
 	bool dynamic = false;
@@ -4285,28 +4267,30 @@ ParseVarnameLong(
 	char endc = startc == '(' ? ')' : '}';
 
 	p += 2;			/* skip "${" or "$(" or "y(" */
-	varname = ParseVarname(&p, startc, endc, scope, emode, &namelen);
+	ParseVarname(&p, startc, endc, scope, emode, &varname);
 
 	if (*p == ':') {
 		haveModifier = true;
 	} else if (*p == endc) {
 		haveModifier = false;
 	} else {
-		Parse_Error(PARSE_FATAL, "Unclosed variable \"%s\"", varname);
-		free(varname);
+		Substring name = LazyBuf_Get(&varname);
+		Parse_Error(PARSE_FATAL, "Unclosed variable \"%.*s\"",
+		    (int)Substring_Length(name), name.start);
+		LazyBuf_Done(&varname);
 		*out_false_pp = p;
 		*out_false_val = FStr_InitRefer(var_Error);
 		*out_false_res = VPR_ERR;
 		return false;
 	}
 
-	v = VarFind(varname, scope, true);
+	v = VarFindSubstring(LazyBuf_Get(&varname), scope, true);
 
 	/* At this point, p points just after the variable name,
 	 * either at ':' or at endc. */
 
 	if (v == NULL) {
-		v = FindLocalLegacyVar(varname, namelen, scope,
+		v = FindLocalLegacyVar(LazyBuf_Get(&varname), scope,
 		    out_true_extraModifiers);
 	}
 
@@ -4315,14 +4299,14 @@ ParseVarnameLong(
 		 * Defer expansion of dynamic variables if they appear in
 		 * non-local scope since they are not defined there.
 		 */
-		dynamic = VarnameIsDynamic(varname, namelen) &&
+		dynamic = VarnameIsDynamic(LazyBuf_Get(&varname)) &&
 			  (scope == SCOPE_CMDLINE || scope == SCOPE_GLOBAL);
 
 		if (!haveModifier) {
 			p++;	/* skip endc */
 			*out_false_pp = p;
 			*out_false_res = EvalUndefined(dynamic, start, p,
-			    varname, emode, out_false_val);
+			    LazyBuf_Get(&varname), emode, out_false_val);
 			return false;
 		}
 
@@ -4340,10 +4324,10 @@ ParseVarnameLong(
 		 * is still undefined, Var_Parse will return an empty string
 		 * instead of the actually computed value.
 		 */
-		v = VarNew(FStr_InitOwn(varname), "", false, false);
+		v = VarNew(LazyBuf_DoneGet(&varname), "", false, false);
 		*out_true_exprDefined = DEF_UNDEF;
 	} else
-		free(varname);
+		LazyBuf_Done(&varname);
 
 	*out_true_endc = endc;
 	*out_true_p = p;

Reply via email to