Module Name:    src
Committed By:   rillig
Date:           Sun Apr 24 10:36:37 UTC 2022

Modified Files:
        src/distrib/sets/lists/tests: mi
        src/tests/usr.bin/indent: Makefile fmt_decl.c lsym_binary_op.c
            lsym_comment.c lsym_do.c lsym_eof.c lsym_for.c lsym_form_feed.c
            lsym_funcname.c lsym_if.c lsym_newline.c lsym_preprocessing.c
            lsym_rparen_or_rbracket.c lsym_semicolon.c lsym_storage_class.c
            lsym_tag.c lsym_typedef.c lsym_unary_op.c lsym_while.c lsym_word.c
            psym_decl.c psym_do.c psym_do_stmt.c psym_else.c psym_stmt.c
            psym_stmt_list.c
Added Files:
        src/tests/usr.bin/indent: edge_cases.c fmt_init.c
Removed Files:
        src/tests/usr.bin/indent: token_binary_op.c token_comment.c
            token_decl.c token_do_stmt.c token_end_of_file.c token_for_exprs.c
            token_form_feed.c token_funcname.c token_ident.c token_keyword_do.c
            token_keyword_do_else.c token_keyword_else.c
            token_keyword_for_if_while.c token_keyword_struct_union_enum.c
            token_newline.c token_postfix_op.c token_preprocessing.c
            token_rparen.c token_semicolon.c token_stmt.c token_stmt_list.c
            token_storage_class.c token_string_prefix.c token_switch_expr.c
            token_type_def.c token_unary_op.c token_while_expr.c

Log Message:
tests/indent: migrate token tests to other tests

In indent.h 1.49 from 2021-10-25, the enumeration token_type was split
into lexer_symbol and parser_symbol to more clearly express that these
tokens fall into completely different classes of usage patterns.


To generate a diff of this commit:
cvs rdiff -u -r1.1197 -r1.1198 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.44 -r1.45 src/tests/usr.bin/indent/Makefile
cvs rdiff -u -r0 -r1.1 src/tests/usr.bin/indent/edge_cases.c \
    src/tests/usr.bin/indent/fmt_init.c
cvs rdiff -u -r1.35 -r1.36 src/tests/usr.bin/indent/fmt_decl.c
cvs rdiff -u -r1.5 -r1.6 src/tests/usr.bin/indent/lsym_binary_op.c \
    src/tests/usr.bin/indent/lsym_typedef.c \
    src/tests/usr.bin/indent/lsym_word.c
cvs rdiff -u -r1.3 -r1.4 src/tests/usr.bin/indent/lsym_comment.c \
    src/tests/usr.bin/indent/lsym_do.c src/tests/usr.bin/indent/lsym_eof.c \
    src/tests/usr.bin/indent/lsym_for.c \
    src/tests/usr.bin/indent/lsym_form_feed.c \
    src/tests/usr.bin/indent/lsym_funcname.c \
    src/tests/usr.bin/indent/lsym_if.c \
    src/tests/usr.bin/indent/lsym_newline.c \
    src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c \
    src/tests/usr.bin/indent/lsym_semicolon.c \
    src/tests/usr.bin/indent/lsym_storage_class.c \
    src/tests/usr.bin/indent/lsym_while.c \
    src/tests/usr.bin/indent/psym_decl.c src/tests/usr.bin/indent/psym_do.c \
    src/tests/usr.bin/indent/psym_do_stmt.c \
    src/tests/usr.bin/indent/psym_else.c src/tests/usr.bin/indent/psym_stmt.c \
    src/tests/usr.bin/indent/psym_stmt_list.c
cvs rdiff -u -r1.4 -r1.5 src/tests/usr.bin/indent/lsym_preprocessing.c \
    src/tests/usr.bin/indent/lsym_tag.c \
    src/tests/usr.bin/indent/lsym_unary_op.c
cvs rdiff -u -r1.13 -r0 src/tests/usr.bin/indent/token_binary_op.c
cvs rdiff -u -r1.30 -r0 src/tests/usr.bin/indent/token_comment.c
cvs rdiff -u -r1.3 -r0 src/tests/usr.bin/indent/token_decl.c \
    src/tests/usr.bin/indent/token_do_stmt.c \
    src/tests/usr.bin/indent/token_end_of_file.c \
    src/tests/usr.bin/indent/token_funcname.c \
    src/tests/usr.bin/indent/token_keyword_do.c \
    src/tests/usr.bin/indent/token_keyword_do_else.c \
    src/tests/usr.bin/indent/token_keyword_else.c \
    src/tests/usr.bin/indent/token_keyword_for_if_while.c \
    src/tests/usr.bin/indent/token_postfix_op.c \
    src/tests/usr.bin/indent/token_rparen.c \
    src/tests/usr.bin/indent/token_stmt.c \
    src/tests/usr.bin/indent/token_stmt_list.c \
    src/tests/usr.bin/indent/token_storage_class.c \
    src/tests/usr.bin/indent/token_switch_expr.c \
    src/tests/usr.bin/indent/token_type_def.c
cvs rdiff -u -r1.4 -r0 src/tests/usr.bin/indent/token_for_exprs.c \
    src/tests/usr.bin/indent/token_newline.c \
    src/tests/usr.bin/indent/token_preprocessing.c \
    src/tests/usr.bin/indent/token_semicolon.c \
    src/tests/usr.bin/indent/token_unary_op.c \
    src/tests/usr.bin/indent/token_while_expr.c
cvs rdiff -u -r1.5 -r0 src/tests/usr.bin/indent/token_form_feed.c \
    src/tests/usr.bin/indent/token_keyword_struct_union_enum.c \
    src/tests/usr.bin/indent/token_string_prefix.c
cvs rdiff -u -r1.7 -r0 src/tests/usr.bin/indent/token_ident.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.1197 src/distrib/sets/lists/tests/mi:1.1198
--- src/distrib/sets/lists/tests/mi:1.1197	Sun Apr 24 08:48:17 2022
+++ src/distrib/sets/lists/tests/mi	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1197 2022/04/24 08:48:17 rillig Exp $
+# $NetBSD: mi,v 1.1198 2022/04/24 10:36:37 rillig Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -4771,6 +4771,7 @@
 ./usr/tests/usr.bin/indent/declarations.0				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/declarations.0.stderr			tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/declarations.0.stdout			tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/edge_cases.c					tests-usr.bin-tests	compattestfile,atf
 ./usr/tests/usr.bin/indent/elsecomment.0				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/elsecomment.0.pro				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/elsecomment.0.stdout				tests-obsolete		obsolete,atf
@@ -4782,6 +4783,7 @@
 ./usr/tests/usr.bin/indent/fmt_decl.c					tests-usr.bin-tests	compattestfile,atf
 ./usr/tests/usr.bin/indent/fmt_else_comment.c				tests-usr.bin-tests	compattestfile,atf
 ./usr/tests/usr.bin/indent/fmt_expr.c					tests-usr.bin-tests	compattestfile,atf
+./usr/tests/usr.bin/indent/fmt_init.c					tests-usr.bin-tests	compattestfile,atf
 ./usr/tests/usr.bin/indent/indent_off_on.c				tests-usr.bin-tests	compattestfile,atf
 ./usr/tests/usr.bin/indent/indent_variables.0				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/indent_variables.0.pro			tests-obsolete		obsolete,atf
@@ -5253,44 +5255,44 @@
 ./usr/tests/usr.bin/indent/token-while_expr.0				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token-while_expr.0.pro			tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token-while_expr.0.stdout			tests-obsolete		obsolete,atf
-./usr/tests/usr.bin/indent/token_binary_op.c				tests-usr.bin-tests	compattestfile,atf
+./usr/tests/usr.bin/indent/token_binary_op.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_case_label.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_colon.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_comma.c				tests-obsolete		obsolete,atf
-./usr/tests/usr.bin/indent/token_comment.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_decl.c					tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_do_stmt.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_end_of_file.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_for_exprs.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_form_feed.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_funcname.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_ident.c				tests-usr.bin-tests	compattestfile,atf
+./usr/tests/usr.bin/indent/token_comment.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_decl.c					tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_do_stmt.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_end_of_file.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_for_exprs.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_form_feed.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_funcname.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_ident.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_if_expr.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_if_expr_stmt.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_if_expr_stmt_else.c			tests-obsolete		obsolete,atf
-./usr/tests/usr.bin/indent/token_keyword_do.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_do_else.c			tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_else.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_for_if_while.c			tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_struct_union_enum.c		tests-usr.bin-tests	compattestfile,atf
+./usr/tests/usr.bin/indent/token_keyword_do.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_do_else.c			tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_else.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_for_if_while.c			tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_struct_union_enum.c		tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_lbrace.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_lparen.c				tests-obsolete		obsolete,atf
-./usr/tests/usr.bin/indent/token_newline.c				tests-usr.bin-tests	compattestfile,atf
+./usr/tests/usr.bin/indent/token_newline.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_period.c				tests-obsolete		obsolete,atf
-./usr/tests/usr.bin/indent/token_postfix_op.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_preprocessing.c			tests-usr.bin-tests	compattestfile,atf
+./usr/tests/usr.bin/indent/token_postfix_op.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_preprocessing.c			tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_question.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/token_rbrace.c				tests-obsolete		obsolete,atf
-./usr/tests/usr.bin/indent/token_rparen.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_semicolon.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_stmt.c					tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_stmt_list.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_storage_class.c			tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_string_prefix.c			tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_switch_expr.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_type_def.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_unary_op.c				tests-usr.bin-tests	compattestfile,atf
-./usr/tests/usr.bin/indent/token_while_expr.c				tests-usr.bin-tests	compattestfile,atf
+./usr/tests/usr.bin/indent/token_rparen.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_semicolon.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_stmt.c					tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_stmt_list.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_storage_class.c			tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_string_prefix.c			tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_switch_expr.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_type_def.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_unary_op.c				tests-obsolete		obsolete,atf
+./usr/tests/usr.bin/indent/token_while_expr.c				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/types_from_file.0				tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/types_from_file.0.list			tests-obsolete		obsolete,atf
 ./usr/tests/usr.bin/indent/types_from_file.0.pro			tests-obsolete		obsolete,atf

Index: src/tests/usr.bin/indent/Makefile
diff -u src/tests/usr.bin/indent/Makefile:1.44 src/tests/usr.bin/indent/Makefile:1.45
--- src/tests/usr.bin/indent/Makefile:1.44	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/Makefile	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.44 2022/04/24 09:04:12 rillig Exp $
+#	$NetBSD: Makefile,v 1.45 2022/04/24 10:36:37 rillig Exp $
 
 .include <bsd.own.mk>
 
@@ -8,10 +8,12 @@ TESTS_SH+=	t_misc
 TESTS_SH+=	t_options
 
 FILESDIR=	${TESTSDIR}
+FILES+=		edge_cases.c
 FILES+=		fmt_block.c
 FILES+=		fmt_decl.c
 FILES+=		fmt_else_comment.c
 FILES+=		fmt_expr.c
+FILES+=		fmt_init.c
 FILES+=		indent_off_on.c
 FILES+=		label.c
 FILES+=		lex_char.c
@@ -110,33 +112,6 @@ FILES+=		psym_stmt_list.c
 FILES+=		psym_switch_expr.c
 FILES+=		psym_while_expr.c
 FILES+=		t_options.awk
-FILES+=		token_binary_op.c
-FILES+=		token_comment.c
-FILES+=		token_decl.c
-FILES+=		token_do_stmt.c
-FILES+=		token_end_of_file.c
-FILES+=		token_for_exprs.c
-FILES+=		token_form_feed.c
-FILES+=		token_funcname.c
-FILES+=		token_ident.c
-FILES+=		token_keyword_do.c
-FILES+=		token_keyword_do_else.c
-FILES+=		token_keyword_else.c
-FILES+=		token_keyword_for_if_while.c
-FILES+=		token_keyword_struct_union_enum.c
-FILES+=		token_newline.c
-FILES+=		token_postfix_op.c
-FILES+=		token_preprocessing.c
-FILES+=		token_rparen.c
-FILES+=		token_semicolon.c
-FILES+=		token_stmt.c
-FILES+=		token_stmt_list.c
-FILES+=		token_storage_class.c
-FILES+=		token_string_prefix.c
-FILES+=		token_switch_expr.c
-FILES+=		token_type_def.c
-FILES+=		token_unary_op.c
-FILES+=		token_while_expr.c
 
 add-test: .PHONY
 	@set -eu; \

Index: src/tests/usr.bin/indent/fmt_decl.c
diff -u src/tests/usr.bin/indent/fmt_decl.c:1.35 src/tests/usr.bin/indent/fmt_decl.c:1.36
--- src/tests/usr.bin/indent/fmt_decl.c:1.35	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/fmt_decl.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: fmt_decl.c,v 1.35 2022/04/24 09:04:12 rillig Exp $	*/
+/*	$NetBSD: fmt_decl.c,v 1.36 2022/04/24 10:36:37 rillig Exp $	*/
 
 /*
  * Tests for declarations of global variables, external functions, and local
@@ -884,3 +884,24 @@ a(char *fe)
 {
 }
 //indent end
+
+
+/*
+ * Before NetBSD indent.c 1.178 from 2021-10-29, indent removed the blank
+ * before the '=', in the second and third of these function pointer
+ * declarations. This was because indent interpreted the prototype parameters
+ * 'int' and 'int, int' as type casts, which doesn't make sense at all. Fixing
+ * this properly requires large style changes since indent is based on simple
+ * heuristics all over. This didn't change in indent.c 1.178; instead, the
+ * rule for inserting a blank before a binary operator was changed to always
+ * insert a blank, except at the beginning of a line.
+ */
+//indent input
+char *(*fn)() = NULL;
+char *(*fn)(int) = NULL;
+char *(*fn)(int, int) = NULL;
+//indent end
+
+/* XXX: The parameter '(int)' is wrongly interpreted as a type cast. */
+/* XXX: The parameter '(int, int)' is wrongly interpreted as a type cast. */
+//indent run-equals-input -di0

Index: src/tests/usr.bin/indent/lsym_binary_op.c
diff -u src/tests/usr.bin/indent/lsym_binary_op.c:1.5 src/tests/usr.bin/indent/lsym_binary_op.c:1.6
--- src/tests/usr.bin/indent/lsym_binary_op.c:1.5	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_binary_op.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_binary_op.c,v 1.5 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_binary_op.c,v 1.6 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_binary_op, which represents a binary operator in
@@ -74,3 +74,89 @@ int var = expr**ptr;
 //indent run -di0
 int var = expr * *ptr;
 //indent end
+
+
+/*
+ * When indent tokenizes some operators, it allows for
+ * arbitrary repetitions of the operator character, followed by an
+ * arbitrary amount of '='.  This is used for operators like '&&' or
+ * '|||==='.
+ *
+ * Before 2021-03-07 22:11:01, the comment '//' was treated as an
+ * operator as well, and so was the comment '/////', leading to
+ * unexpected results.
+ *
+ * See lexi.c, lexi, "default:".
+ */
+//indent input
+void
+long_run_of_operators(void)
+{
+	if (a &&&&&&& b)
+		return;
+	if (a |||=== b)
+		return;
+}
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Long chains of '+' and '-' must be split into several operators as the
+ * lexer has to distinguish between '++' and '+' early.  The following
+ * sequence is thus tokenized as:
+ *
+ *	word		"a"
+ *	postfix_op	"++"
+ *	binary_op	"++"
+ *	unary_op	"++"
+ *	unary_op	"+"
+ *	word		"b"
+ *
+ * See lexi.c, lexi, "case '+':".
+ */
+//indent input
+void
+joined_unary_and_binary_operators(void)
+{
+	if (a +++++++ b)
+		return;
+}
+//indent end
+
+//indent run
+void
+joined_unary_and_binary_operators(void)
+{
+	if (a++ ++ ++ +b)
+		return;
+}
+//indent end
+
+
+/*
+ * Ensure that the result of the indentation does not depend on whether a
+ * token from the input starts in column 1 or 9.
+ *
+ * See process_binary_op, ps.curr_col_1.
+ */
+//indent input
+int col_1 //
+= //
+1;
+
+int col_9 //
+	= //
+	9;
+//indent end
+
+//indent run
+int		col_1		//
+=				//
+1;
+
+int		col_9		//
+=				//
+9;
+//indent end
Index: src/tests/usr.bin/indent/lsym_typedef.c
diff -u src/tests/usr.bin/indent/lsym_typedef.c:1.5 src/tests/usr.bin/indent/lsym_typedef.c:1.6
--- src/tests/usr.bin/indent/lsym_typedef.c:1.5	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_typedef.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_typedef.c,v 1.5 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_typedef.c,v 1.6 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_typedef, which represents the keyword 'typedef'
@@ -48,3 +48,14 @@ enum {
 	EC2
 }		E;
 //indent end
+
+
+/*
+ * Contrary to declarations, type definitions are not affected by the option
+ * '-di'.
+ */
+//indent input
+typedef int number;
+//indent end
+
+//indent run-equals-input
Index: src/tests/usr.bin/indent/lsym_word.c
diff -u src/tests/usr.bin/indent/lsym_word.c:1.5 src/tests/usr.bin/indent/lsym_word.c:1.6
--- src/tests/usr.bin/indent/lsym_word.c:1.5	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_word.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_word.c,v 1.5 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_word.c,v 1.6 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_word, which represents a constant, a string
@@ -11,6 +11,8 @@
 // TODO: Is '"string"(' syntactically valid in any context?
 // TODO: Is '123(' syntactically valid in any context?
 // TODO: Would the output of the above depend on -pcs/-npcs?
+// TODO: Add more systematic tests.
+// TODO: Completely cover each state transition in lex_number_state.
 
 //indent input
 // TODO: add input
@@ -32,3 +34,114 @@ int var\
 //indent run
 int		var + name = 4;
 //indent end
+
+
+//indent input
+wchar_t wide_string[] = L"wide string";
+//indent end
+
+/*
+ * Regardless of the line length, the 'L' must never be separated from the
+ * string literal.  Before lexi.c 1.167 from 2021-11-28, the 'L' was a
+ * separate token, which could have resulted in accidental spacing between the
+ * 'L' and the following "".
+ */
+//indent run-equals-input -di0
+
+//indent run-equals-input -di0 -l25
+
+//indent run-equals-input -di0 -l1
+
+
+//indent input
+wchar_t wide_char[] = L'w';
+//indent end
+
+//indent run-equals-input -di0
+
+
+/* Binary number literals, a GCC extension that was added in C11. */
+//indent input
+#define b00101010 -1
+void t(void) {
+	unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
+	float x[] = {.7f, 0.7f};
+	unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
+
+	if (0 b00101010)
+		return;
+	/* $ '0r' is not a number base prefix, so the tokens are split. */
+	if (0r12345)
+		return;
+}
+//indent end
+
+//indent run
+#define b00101010 -1
+void
+t(void)
+{
+	unsigned	a[] = {0b00101010, 0x00005678, 02, 17U};
+	float		x[] = {.7f, 0.7f};
+	unsigned long	ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
+
+	if (0 b00101010)
+		return;
+	if (0 r12345)
+		return;
+}
+//indent end
+
+
+/* Floating point numbers. */
+//indent input
+void t(void) {
+	unsigned long x = 314UL;
+	double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
+	int z = 0b0101;
+	DO_NOTHING;
+	x._y = 5;
+}
+//indent end
+
+//indent run
+void
+t(void)
+{
+	unsigned long	x = 314UL;
+	double		y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
+	int		z = 0b0101;
+	DO_NOTHING;
+	x._y = 5;
+}
+//indent end
+
+
+/*
+ * Test identifiers containing '$', which some compilers support as an
+ * extension to the C standard.
+ */
+//indent input
+int $		= jQuery;			// just kidding
+const char SYS$LOGIN[]="$HOME";
+//indent end
+
+//indent run
+int		$ = jQuery;	// just kidding
+const char	SYS$LOGIN[] = "$HOME";
+//indent end
+
+
+/*
+ * Test the tokenizer for number constants.
+ *
+ * When the tokenizer reads a character that makes a token invalid (such as
+ * '0x') but may later be extended to form a valid token (such as '0x123'),
+ * indent does not care about this invalid prefix and returns it nevertheless.
+ */
+//indent input
+int unfinished_hex_prefix = 0x;
+double unfinished_hex_float = 0x123p;
+//indent end
+
+//indent run-equals-input -di0

Index: src/tests/usr.bin/indent/lsym_comment.c
diff -u src/tests/usr.bin/indent/lsym_comment.c:1.3 src/tests/usr.bin/indent/lsym_comment.c:1.4
--- src/tests/usr.bin/indent/lsym_comment.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_comment.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_comment.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_comment.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_comment, which starts a comment.
@@ -11,8 +11,1085 @@
  *	token_comment.c
  */
 
+/*-
+ * TODO: systematically test comments
+ *
+ * - starting in column 1, with opt.format_col1_comments (-fc1)
+ * - starting in column 1, without opt.format_col1_comments (-fc1)
+ * - starting in column 9, independent of opt.format_col1_comments (-fc1)
+ * - starting in column 33, the default
+ * - starting in column 65, which is already close to the default right margin
+ * - starting in column 81, spilling into the right margin
+ *
+ * - block comment starting with '/' '*' '-'
+ * - block comment starting with '/' '*' '*'
+ * - block comment starting with '/' '*' '\n'
+ * - end-of-line comment starting with '//'
+ * - end-of-line comment starting with '//x', so without leading space
+ * - block comment starting with '/' '*' 'x', so without leading space
+ *
+ * - block/end-of-line comment to the right of a label
+ * - block/end-of-line comment to the right of code
+ * - block/end-of-line comment to the right of label with code
+ *
+ * - with/without opt.comment_delimiter_on_blankline (-cdb)
+ * - with/without opt.star_comment_cont (-sc)
+ * - with/without opt.format_block_comments (-fbc)
+ * - with varying opt.max_line_length (32, 64, 80, 140)
+ * - with varying opt.unindent_displace (-d0, -d2, -d-5)
+ * - with varying opt.indent_size (3, 4, 8)
+ * - with varying opt.tabsize (3, 4, 8, 16)
+ * - with varying opt.block_comment_max_line_length (-lc60, -lc78, -lc90)
+ * - with varying opt.comment_column (-c0, -c1, -c33, -c80)
+ * - with varying opt.decl_comment_column (-cd0, -cd1, -cd20, -cd33, -cd80)
+ * - with/without ps.decl_on_line
+ * - with/without ps.next_col_1
+ *
+ * - very long comments that overflow the buffer 'com'
+ * - comments that come from save_com
+ * - very long word that already spills over the right margin
+ * - wrap/nowrap comment containing '\n'
+ * - wrap/nowrap comment containing '\f'
+ * - wrap/nowrap comment containing '\t'
+ * - wrap/nowrap comment containing '\b'
+ */
+
+//indent input
+typedef enum x {
+	aaaaaaaaaaaaaaaaaaaaaa = 1 << 0,	/* test a */
+	bbbbbbbbbbbbbbbbb = 1 << 1,	/* test b */
+	cccccccccccccc = 1 << 1,	/* test c */
+	dddddddddddddddddddddddddddddd = 1 << 2	/* test d */
+} x;
+//indent end
+
+//indent run-equals-input -bbb
+
+
+//indent input
+/* See FreeBSD r303597, r303598, r309219, and r309343 */
+void
+t(void) {
+	/*
+	 * Old indent wrapped the URL near where this sentence ends.
+	 *
+	 * https://www.freebsd.org/cgi/man.cgi?query=indent&apropos=0&sektion=0&manpath=FreeBSD+12-current&arch=default&format=html
+	 */
+
+	/*
+	 * The default maximum line length for comments is 78, and the 'kk' at
+	 * the end makes the line exactly 78 bytes long.
+	 *
+	 * aaaaaa bbbbbb cccccc dddddd eeeeee ffffff ggggg hhhhh iiiii jjjj kk
+	 */
+
+	/*
+	 * Old indent unnecessarily removed the star comment continuation on the next line.
+	 *
+	 * *test*
+	 */
+
+	/* r309219 Go through linked list, freeing from the malloced (t[-1]) address. */
+
+	/* r309343	*/
+}
+//indent end
+
+//indent run -bbb
+/* See FreeBSD r303597, r303598, r309219, and r309343 */
+void
+t(void)
+{
+	/*
+	 * Old indent wrapped the URL near where this sentence ends.
+	 *
+	 * https://www.freebsd.org/cgi/man.cgi?query=indent&apropos=0&sektion=0&manpath=FreeBSD+12-current&arch=default&format=html
+	 */
+
+	/*
+	 * The default maximum line length for comments is 78, and the 'kk' at
+	 * the end makes the line exactly 78 bytes long.
+	 *
+	 * aaaaaa bbbbbb cccccc dddddd eeeeee ffffff ggggg hhhhh iiiii jjjj kk
+	 */
+
+	/*
+	 * Old indent unnecessarily removed the star comment continuation on
+	 * the next line.
+	 *
+	 * *test*
+	 */
+
+	/*
+	 * r309219 Go through linked list, freeing from the malloced (t[-1])
+	 * address.
+	 */
+
+	/* r309343	*/
+}
+//indent end
+
+
+/*
+ * The first Christmas tree is to the right of the code, therefore the comment
+ * is moved to the code comment column; the follow-up lines of that comment
+ * are moved by the same distance, to preserve the internal layout.
+ *
+ * The other Christmas tree is a standalone block comment, therefore the
+ * comment starts in the code column.
+ *
+ * Since the comments occur between psym_if_expr and the following statement,
+ * they are handled by search_stmt_comment.
+ */
+//indent input
+{
+	if (1) /*- a Christmas tree  *  search_stmt_comment
+				    ***
+				   ***** */
+		    /*- another one *  search_stmt_comment
+				   ***
+				  ***** */
+		1;
+}
+//indent end
+
+//indent run -bbb
+{
+	if (1)			/*- a Christmas tree  *  search_stmt_comment
+						     ***
+						    ***** */
+		/*- another one *  search_stmt_comment
+			       ***
+			      ***** */
+		1;
+}
+//indent end
+
+
+/*
+ * The first Christmas tree is to the right of the code, therefore the comment
+ * is moved to the code comment column; the follow-up lines of that comment
+ * are moved by the same distance, to preserve the internal layout.
+ *
+ * The other Christmas tree is a standalone block comment, therefore the
+ * comment starts in the code column.
+ */
+//indent input
+{
+	if (7) { /*- a Christmas tree  *
+				      ***
+				     ***** */
+		    /*- another one *
+				   ***
+				  ***** */
+		stmt();
+	}
+}
+//indent end
+
+//indent run -bbb
+{
+	if (7) {		/*- a Christmas tree  *
+					             ***
+					            ***** */
+		/*- another one *
+			       ***
+			      ***** */
+		stmt();
+	}
+}
+//indent end
+
+
+//indent input
+int decl;/*-fixed comment
+	    fixed comment*/
+//indent end
+
+//indent run -di0
+int decl;			/*-fixed comment
+			           fixed comment*/
+//indent end
+/*
+ * XXX: The second line of the above comment contains 11 spaces in a row,
+ * instead of using as many tabs as possible.
+ */
+
+
+//indent input
+{
+	if (0)/*-search_stmt_comment   |
+	   search_stmt_comment         |*/
+		;
+}
+//indent end
+
+//indent run -di0
+{
+	if (0)			/*-search_stmt_comment   |
+			     search_stmt_comment         |*/
+		;
+}
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the right.
+ */
+//indent input
+int decl;/*-fixed comment
+123456789ab fixed comment*/
+//indent end
+
+//indent run -di0
+int decl;			/*-fixed comment
+		       123456789ab fixed comment*/
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the right.
+ *
+ * This comment is handled by search_stmt_comment.
+ */
+//indent input
+{
+	if(0)/*-search_stmt_comment
+123456789ab search_stmt_comment   |*/
+	    ;
+}
+//indent end
+
+//indent run -di0
+{
+	if (0)			/*-search_stmt_comment
+		   123456789ab search_stmt_comment   |*/
+		;
+}
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the left. In this case, the internal layout of the comment cannot be
+ * preserved since the second line already starts in column 1.
+ */
+//indent input
+int decl;					    /*-|fixed comment
+					| minus 12     |
+		| tabs inside		|
+	    |---|
+|-----------|
+tab1+++	tab2---	tab3+++	tab4---	tab5+++	tab6---	tab7+++fixed comment*/
+//indent end
+
+//indent run -di0
+int decl;			/*-|fixed comment
+		    | minus 12     |
+| tabs inside		|
+|---|
+|-----------|
+tab1+++	tab2---	tab3+++	tab4---	tab5+++	tab6---	tab7+++fixed comment*/
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the left. In this case, the internal layout of the comment cannot be
+ * preserved since the second line already starts in column 1.
+ *
+ * This comment is processed by search_stmt_comment.
+ */
+//indent input
+{
+	if(0)					    /*-|search_stmt_comment
+					| minus 12     |
+		| tabs inside		|
+	    |---|
+|-----------|
+tab1+++	tab2---	tab3+++	tab4---	tab5+++	tab6---	tab7+++fixed comment*/
+		;
+}
+//indent end
+
+//indent run -di0
+{
+	if (0)			/*-|search_stmt_comment
+		    | minus 12     |
+| tabs inside		|
+|---|
+|-----------|
+tab1+++	tab2---	tab3+++	tab4---	tab5+++	tab6---	tab7+++fixed comment*/
+		;
+}
+//indent end
+
+
+/*
+ * Ensure that '{' after a search_stmt_comment is preserved.
+ */
+//indent input
+{
+	if(0)/*comment*/{
+	}
+}
+//indent end
+
+/* The comment in the output has moved to the right of the '{'. */
+//indent run
+{
+	if (0) {		/* comment */
+	}
+}
+//indent end
+
+
+/*
+ * The following comments test line breaking when the comment ends with a
+ * space.
+ */
+//indent input
+/* 456789 123456789 123456789 12345 */
+/* 456789 123456789 123456789 123456 */
+/* 456789 123456789 123456789 1234567 */
+/* 456789 123456789 123456789 12345678 */
+/* 456789 123456789 123456789 123456789 */
+//indent end
+
+//indent run -l38
+/* 456789 123456789 123456789 12345 */
+/*
+ * 456789 123456789 123456789 123456
+ */
+/*
+ * 456789 123456789 123456789 1234567
+ */
+/*
+ * 456789 123456789 123456789 12345678
+ */
+/*
+ * 456789 123456789 123456789
+ * 123456789
+ */
+//indent end
+
+
+/*
+ * The following comments test line breaking when the comment does not end
+ * with a space. Since indent adds a trailing space to a single-line comment,
+ * this space has to be taken into account when computing the line length.
+ */
+//indent input
+/* x		. line length 35*/
+/* x		.. line length 36*/
+/* x		... line length 37*/
+/* x		.... line length 38*/
+/* x		..... line length 39*/
+/* x		...... line length 40*/
+/* x		....... line length 41*/
+/* x		........ line length 42*/
+//indent end
+
+//indent run -l38
+/* x		. line length 35 */
+/* x		.. line length 36 */
+/* x		... line length 37 */
+/* x		.... line length 38 */
+/*
+ * x		..... line length 39
+ */
+/*
+ * x		...... line length 40
+ */
+/*
+ * x		....... line length 41
+ */
+/*
+ * x		........ line length 42
+ */
+//indent end
+
+
+/*
+ * The different types of comments that indent distinguishes, starting in
+ * column 1 (see options '-fc1' and '-nfc1').
+ */
+//indent input
+/* This is a traditional C block comment. */
+
+// This is a C99 line comment.
+
+/*
+ * This is a box comment since its first line (the one above this line) is
+ * empty.
+ *
+ *
+ *
+ * Its text gets wrapped.
+ * Empty lines serve as paragraphs.
+ */
+
+/**
+ * This is a box comment
+ * that is not re-wrapped.
+ */
+
+/*-
+ * This is a box comment
+ * that is not re-wrapped.
+ * It is often used for copyright declarations.
+ */
+//indent end
+
+//indent run
+/* This is a traditional C block comment. */
+
+// This is a C99 line comment.
+
+/*
+ * This is a box comment since its first line (the one above this line) is
+ * empty.
+ *
+ *
+ *
+ * Its text gets wrapped. Empty lines serve as paragraphs.
+ */
+
+/**
+ * This is a box comment
+ * that is not re-wrapped.
+ */
+
+/*-
+ * This is a box comment
+ * that is not re-wrapped.
+ * It is often used for copyright declarations.
+ */
+//indent end
+
+
+/*
+ * The different types of comments that indent distinguishes, starting in
+ * column 9, so they are independent of the option '-fc1'.
+ */
+//indent input
+void
+function(void)
+{
+	/* This is a traditional C block comment. */
+
+	/*
+	 * This is a box comment.
+	 *
+	 * It starts in column 9, not 1,
+	 * therefore it gets re-wrapped.
+	 */
+
+	/**
+	 * This is a box comment
+	 * that is not re-wrapped, even though it starts in column 9, not 1.
+	 */
+
+	/*-
+	 * This is a box comment
+	 * that is not re-wrapped.
+	 */
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+	/* This is a traditional C block comment. */
+
+	/*
+	 * This is a box comment.
+	 *
+	 * It starts in column 9, not 1, therefore it gets re-wrapped.
+	 */
+
+	/**
+	 * This is a box comment
+	 * that is not re-wrapped, even though it starts in column 9, not 1.
+	 */
+
+	/*-
+	 * This is a box comment
+	 * that is not re-wrapped.
+	 */
+}
+//indent end
+
+
+/*
+ * Comments to the right of declarations.
+ */
+//indent input
+void
+function(void)
+{
+	int decl;	/* declaration comment */
+
+	int decl;	/* short
+			 * multi-line
+			 * declaration
+			 * comment */
+
+	int decl;	/* long single-line declaration comment that is longer than the allowed line width */
+
+	int decl;	/* long multi-line declaration comment
+ * that is longer than
+ * the allowed line width */
+
+	int decl;	// C99 declaration comment
+
+	{
+		int decl;	/* indented declaration */
+		{
+			int decl;	/* indented declaration */
+			{
+				int decl;	/* indented declaration */
+				{
+					int decl;	/* indented declaration */
+				}
+			}
+		}
+	}
+}
+//indent end
+
+//indent run -ldi0
+void
+function(void)
+{
+	int decl;		/* declaration comment */
+
+	int decl;		/* short multi-line declaration comment */
+
+	int decl;		/* long single-line declaration comment that
+				 * is longer than the allowed line width */
+
+	int decl;		/* long multi-line declaration comment that is
+				 * longer than the allowed line width */
+
+	int decl;		// C99 declaration comment
+
+	{
+		int decl;	/* indented declaration */
+		{
+			int decl;	/* indented declaration */
+			{
+				int decl;	/* indented declaration */
+				{
+					int decl;	/* indented declaration */
+				}
+			}
+		}
+	}
+}
+//indent end
+
+
+/*
+ * Comments to the right of code.
+ */
+//indent input
+void
+function(void)
+{
+	code();			/* code comment */
+	code();			/* code comment _________ to line length 78 */
+	code();			/* code comment __________ to line length 79 */
+	code();			/* code comment ___________ to line length 80 */
+	code();			/* code comment ____________ to line length 81 */
+	code();			/* code comment _____________ to line length 82 */
+
+/* $ In the following comments, the line length is measured after formatting. */
+	code();			/* code comment _________ to line length 78*/
+	code();			/* code comment __________ to line length 79*/
+	code();			/* code comment ___________ to line length 80*/
+	code();			/* code comment ____________ to line length 81*/
+	code();			/* code comment _____________ to line length 82*/
+
+	code();			/* short
+				 * multi-line
+				 * code
+				 * comment */
+
+	code();			/* long single-line code comment that is longer than the allowed line width */
+
+	code();			/* long multi-line code comment
+ * that is longer than
+ * the allowed line width */
+
+	code();			// C99 code comment
+	code();			// C99 code comment ________ to line length 78
+	code();			// C99 code comment _________ to line length 79
+	code();			// C99 code comment __________ to line length 80
+	code();			// C99 code comment ___________ to line length 81
+	code();			// C99 code comment ____________ to line length 82
+
+	if (cond) /* comment */
+		if (cond) /* comment */
+			if (cond) /* comment */
+				if (cond) /* comment */
+					if (cond) /* comment */
+						code(); /* comment */
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+	code();			/* code comment */
+	code();			/* code comment _________ to line length 78 */
+	code();			/* code comment __________ to line length 79 */
+	code();			/* code comment ___________ to line length 80 */
+	code();			/* code comment ____________ to line length 81 */
+	code();			/* code comment _____________ to line length
+				 * 82 */
+
+/* $ In the following comments, the line length is measured after formatting. */
+	code();			/* code comment _________ to line length 78 */
+	code();			/* code comment __________ to line length 79 */
+	code();			/* code comment ___________ to line length 80 */
+	code();			/* code comment ____________ to line length 81 */
+	code();			/* code comment _____________ to line length
+				 * 82 */
+
+	code();			/* short multi-line code comment */
+
+	code();			/* long single-line code comment that is
+				 * longer than the allowed line width */
+
+	code();			/* long multi-line code comment that is longer
+				 * than the allowed line width */
+
+/* $ Trailing C99 comments are not wrapped, as indent would not correctly */
+/* $ recognize the continuation lines as continued comments. For block */
+/* $ comments this works since the comment has not ended yet. */
+	code();			// C99 code comment
+	code();			// C99 code comment ________ to line length 78
+	code();			// C99 code comment _________ to line length 79
+	code();			// C99 code comment __________ to line length 80
+	code();			// C99 code comment ___________ to line length 81
+	code();			// C99 code comment ____________ to line length 82
+
+	if (cond)		/* comment */
+		if (cond)	/* comment */
+			if (cond)	/* comment */
+				if (cond)	/* comment */
+					if (cond)	/* comment */
+						code();	/* comment */
+}
+//indent end
+
+
+//indent input
+/*
+	 * this
+		 * is a boxed
+			 * staircase.
+*
+* Its paragraphs get wrapped.
+
+There may also be
+		lines without asterisks.
+
+ */
+//indent end
+
+//indent run
+/*
+ * this is a boxed staircase.
+ *
+ * Its paragraphs get wrapped.
+ *
+ * There may also be lines without asterisks.
+ *
+ */
+//indent end
+
+
+//indent input
+void loop(void)
+{
+while(cond)/*comment*/;
+
+	while(cond)
+	/*comment*/;
+}
+//indent end
+
+//indent run
+void
+loop(void)
+{
+	while (cond)		/* comment */
+		;
+
+	while (cond)
+/* $ XXX: The spaces around the comment look unintentional. */
+		 /* comment */ ;
+}
+//indent end
+
+
+/*
+ * The following comment starts really far to the right. To avoid that each
+ * line only contains a single word, the maximum allowed line width is
+ * extended such that each comment line may contain 22 characters.
+ */
+//indent input
+int		global_variable_with_really_long_name_that_reaches_up_to_column_83;	/* 1234567890123456789 1 1234567890123456789 12 1234567890123456789 123 1234567890123456789 1234 1234567890123456789 12345 1234567890123456789 123456 */
+//indent end
+
+//indent run
+int		global_variable_with_really_long_name_that_reaches_up_to_column_83;	/* 1234567890123456789 1
+											 * 1234567890123456789 12
+											 * 1234567890123456789
+											 * 123
+											 * 1234567890123456789
+											 * 1234
+											 * 1234567890123456789
+											 * 12345
+											 * 1234567890123456789
+											 * 123456 */
+//indent end
+
+
+/*
+ * Demonstrates handling of line-end '//' comments.
+ *
+ * Even though this type of comments had been added in C99, indent didn't
+ * support these comments until 2021 and instead messed up the code in
+ * seemingly unpredictable ways. It treated any sequence of '/' as a binary
+ * operator, no matter whether it was '/' or '//' or '/////'.
+ */
+//indent input
+int dummy // comment
+    = // eq
+    1		// one
+    + // plus
+    2;// two
+
+/////separator/////
+
+void function(void){}
+
+// Note: removing one of these line-end comments affected the formatting
+// of the main function below, before indent supported '//' comments.
+
+int
+main(void)
+{
+}
+//indent end
+
+//indent run
+int		dummy		// comment
+=				// eq
+1				// one
++				// plus
+2;				// two
+
+/////separator/////
+
+void
+function(void)
+{
+}
+
+// Note: removing one of these line-end comments affected the formatting
+// of the main function below, before indent supported '//' comments.
+
+int
+main(void)
+{
+}
+//indent end
+
+
+/*
+ * Between March 2021 and October 2021, indent supported C99 comments only
+ * very basically. It messed up the following code, repeating the identifier
+ * 'bar' twice in a row.
+ */
+//indent input
+void c99_comment(void)
+{
+foo(); // C99 comment
+bar();
+}
+//indent end
+
+//indent run
+void
+c99_comment(void)
+{
+	foo();			// C99 comment
+	bar();
+}
+//indent end
+
+
 //indent input
-// TODO: add input
+void
+comment_at_end_of_function(void)
+{
+	if (cond)
+		statement();
+	// comment
+}
 //indent end
 
 //indent run-equals-input
+
+
+//indent input
+int		decl;
+// end-of-line comment at the end of the file
+//indent end
+
+//indent run-equals-input
+
+
+/* A form feed in the middle of a comment is an ordinary character. */
+//indent input
+/*
+ * AE
+ */
+/*-AE*/
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * At the beginning of a block comment or after a '*', '\f' is special. This
+ * is an implementation detail that should not be visible from the outside.
+ * Form feeds in comments are seldom used though, so this is no problem.
+ */
+//indent input
+/* comment*/
+/*text* comment*/
+//indent end
+
+//indent run
+/* * comment */
+/* text* * comment */
+//indent end
+
+/*
+ * Without 'star_comment_cont', there is no separator between the form feed
+ * and the surrounding text.
+ */
+//indent run -nsc
+/*comment */
+/* text*comment */
+//indent end
+
+//indent run-equals-input -nfc1
+
+
+/*
+ * A completely empty line in a box comment must be copied unmodified to the
+ * output. This is done in process_comment by adding a space to the end of an
+ * otherwise empty comment. This space forces output_complete_line to add some output,
+ * but the trailing space is discarded, resulting in an empty line.
+ */
+//indent input
+/*- comment
+
+
+end */
+//indent end
+
+//indent run-equals-input -nfc1
+
+
+//indent input
+/* comment comment comment comment Ümläute */
+//indent end
+
+//indent run -l40
+/*
+ * comment comment comment comment
+ * Ümläute
+ */
+//indent end
+
+
+//indent input
+int f(void)
+{
+	if (0)
+		/* 12 1234 123 123456 1234 1234567 123 1234.  */;
+}
+//indent end
+
+/* The comment is too long to fit in a single line. */
+//indent run -l54
+int
+f(void)
+{
+	if (0)
+		/*
+		 * 12 1234 123 123456 1234 1234567 123
+		 * 1234.
+		  */ ;
+}
+//indent end
+
+/* The comment fits in a single line. */
+//indent run
+int
+f(void)
+{
+	if (0)
+		 /* 12 1234 123 123456 1234 1234567 123 1234.  */ ;
+}
+//indent end
+
+
+/*
+ * Test for an edge cases in comment handling, having a block comment inside
+ * a line comment. Before NetBSD pr_comment.c 1.96 from 2021-11-04, indent
+ * wrongly assumed that the comment would end at the '*' '/', tokenizing the
+ * second word 'still' as a type_outside_parentheses.
+ */
+//indent input
+/* block comment */
+// line comment /* still a line comment */ still a line comment
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Tests for comments that are not wrapped.
+ */
+//indent input
+/*-	tab space	tab space */
+/*-	very-long-word-that-cannot-be-broken very-long-word-that-cannot-be-broken */
+/*-	very-long-word-that-cannot-be-broken very-long-word-that-cannot-be-broken */
+//indent end
+
+//indent run-equals-input -l5
+
+//indent run-equals-input -l32
+
+
+/*
+ * Test for form feeds in nowrap comments.
+ */
+//indent input
+/*-*/
+/*-<>*/
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Test two completely empty lines in a wrap comment. The second empty line
+ * covers the condition ps.next_col_1 in copy_comment_wrap.
+ */
+//indent input
+/* line 1
+
+
+line 4 */
+//indent end
+
+//indent run
+/*
+ * line 1
+ *
+ *
+ * line 4
+ */
+//indent end
+
+//indent run-equals-input -nfc1
+
+//indent run-equals-input -nfc1 -nsc
+
+//indent run -nsc
+/*
+line 1
+
+
+line 4
+ */
+//indent end
+
+//indent run-equals-input -nsc -ncdb
+
+
+/*
+ * Cover the code for expanding the comment buffer. As of 2021-11-07, the
+ * default buffer size is 200. To actually fill the comment buffer, there must
+ * be a single line of a comment that is longer than 200 bytes.
+ */
+//indent input
+/*-_____10________20________30________40________50________60________70________80________90_______100_______110_______120_______130_______140_______150_______160_______170_______180_______190_______200 */
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Cover the code for expanding the comment buffer in com_terminate. As of
+ * 2021-11-07, the default buffer size is 200, with a safety margin of 1 at
+ * the beginning and another safety margin of 5 at the end. To force the
+ * comment buffer to expanded in com_terminate, the comment must be exactly
+ * 193 bytes long.
+ */
+//indent input
+/*-_____10________20________30________40________50________60________70________80________90_______100_______110_______120_______130_______140_______150_______160_______170_______180_______190 */
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Since 2019-04-04 and before pr_comment.c 1.123 from 2021-11-25, the
+ * function analyze_comment wrongly joined the two comments.
+ */
+//indent input
+/*
+ *//*
+join*/
+//indent end
+
+/* FIXME: The last line of the first comment must not be modified. */
+//indent run -nfc1
+/*
+  *//*
+  * join
+  */
+//indent end
+
+
+/*
+ * Since 2019-04-04 and before pr_comment.c 1.123 from 2021-11-25, the
+ * function analyze_comment generated malformed output by terminating the
+ * first comment but omitting the start of the second comment.
+ */
+//indent input
+/*
+*//*
+error*/
+//indent end
+
+//indent run -nfc1
+/*
+ *//*
+  * error
+  */
+//indent end
Index: src/tests/usr.bin/indent/lsym_do.c
diff -u src/tests/usr.bin/indent/lsym_do.c:1.3 src/tests/usr.bin/indent/lsym_do.c:1.4
--- src/tests/usr.bin/indent/lsym_do.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_do.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_do.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_do.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_do, which represents the keyword 'do' that starts
@@ -12,7 +12,19 @@
  */
 
 //indent input
-// TODO: add input
+void
+function(void)
+{
+	do stmt();while(cond);
+}
 //indent end
 
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+	do
+		stmt();
+	while (cond);
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_eof.c
diff -u src/tests/usr.bin/indent/lsym_eof.c:1.3 src/tests/usr.bin/indent/lsym_eof.c:1.4
--- src/tests/usr.bin/indent/lsym_eof.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_eof.c	Sun Apr 24 10:36:37 2022
@@ -1,11 +1,16 @@
-/* $NetBSD: lsym_eof.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_eof.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_eof, which represents the end of the input file.
+ *
+ * The end of a file typically occurs after a top-level declaration, or after
+ * a preprocessing directive. Everything else is a syntax error.
  */
 
 //indent input
-// TODO: add input
+int decl;
 //indent end
 
-//indent run-equals-input
+//indent run
+int		decl;
+//indent end
Index: src/tests/usr.bin/indent/lsym_for.c
diff -u src/tests/usr.bin/indent/lsym_for.c:1.3 src/tests/usr.bin/indent/lsym_for.c:1.4
--- src/tests/usr.bin/indent/lsym_for.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_for.c	Sun Apr 24 10:36:37 2022
@@ -1,12 +1,89 @@
-/* $NetBSD: lsym_for.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_for.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_for, which represents the keyword 'for' that
  * starts a 'for' loop.
+ *
+ * Most 'for' loops have 3 expressions in their head.  Each of these
+ * expressions is optional though.
+ *
+ * When all 3 expressions are omitted, the 'for' loop is often called a
+ * 'forever' loop.
  */
 
 //indent input
-// TODO: add input
+void
+example(void)
+{
+	for (;;)
+		break;
+	for (var = value;;)
+		break;
+	for (; cond;)
+		break;
+	for (;; i++)
+		break;
+}
+//indent end
+
+//indent run-equals-input
+
+
+//indent input
+void
+function(void)
+{
+	for (int i = 0; i < 6; i++)
+		print_char("hello\n"[i]);
+	forever {
+		stmt();
+	}
+}
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Indent can cope with various syntax errors, which may be caused by
+ * syntactic macros like 'forever' or 'foreach'.
+ */
+//indent input
+#define forever for (;;)
+#define foreach(list, it) for (it = list.first; it != NULL; it = it->next)
+
+void
+function(void)
+{
+	forever
+		stmt();
+
+	forever {
+		stmt();
+	}
+
+/* $ No space after 'foreach' since it looks like a function name. */
+	foreach(list, it)
+		println(it->data);
+
+/* $ No space after 'foreach' since it looks like a function name. */
+	foreach(list, it) {
+		println(it->data);
+	}
+}
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Another variant of a 'for' loop, seen in sys/arch/arm/apple/apple_intc.c.
+ */
+//indent input
+{
+	for (CPU_INFO_FOREACH(cii, ci)) {
+	}
+}
 //indent end
 
 //indent run-equals-input
Index: src/tests/usr.bin/indent/lsym_form_feed.c
diff -u src/tests/usr.bin/indent/lsym_form_feed.c:1.3 src/tests/usr.bin/indent/lsym_form_feed.c:1.4
--- src/tests/usr.bin/indent/lsym_form_feed.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_form_feed.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_form_feed.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_form_feed.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_form_feed, which represents a form feed, a special
@@ -8,7 +8,41 @@
  */
 
 //indent input
-// TODO: add input
+void function_1(void);
+
+void function_2(void);
 //indent end
 
-//indent run-equals-input
+//indent run -di0
+void function_1(void);
+
+/* $ XXX: The form feed is not preserved. */
+/* $ XXX: Why 2 empty lines? */
+
+void function_2(void);
+//indent end
+
+
+/*
+ * Test form feed after 'if (expr)', which is handled in search_stmt.
+ */
+//indent input
+void function(void)
+{
+	if (expr)
+	 /* <-- form feed */
+	{
+	}
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+	if (expr) {
+		/* $ XXX: The form feed has disappeared. */
+		/* <-- form feed */
+	}
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_funcname.c
diff -u src/tests/usr.bin/indent/lsym_funcname.c:1.3 src/tests/usr.bin/indent/lsym_funcname.c:1.4
--- src/tests/usr.bin/indent/lsym_funcname.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_funcname.c	Sun Apr 24 10:36:37 2022
@@ -1,15 +1,23 @@
-/* $NetBSD: lsym_funcname.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_funcname.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_funcname, which is an identifier that is followed
  * by an opening parenthesis.
  *
+ * TODO: Document how lsym_funcname is handled differently from lsym_word.
+ *
  * See also:
  *	lsym_word.c
  */
 
 //indent input
-// TODO: add input
+void
+function(void)
+{
+	func();
+	(func)();
+	func(1, 2, 3);
+}
 //indent end
 
 //indent run-equals-input
Index: src/tests/usr.bin/indent/lsym_if.c
diff -u src/tests/usr.bin/indent/lsym_if.c:1.3 src/tests/usr.bin/indent/lsym_if.c:1.4
--- src/tests/usr.bin/indent/lsym_if.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_if.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_if.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_if.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_if, which represents the keyword 'if' that starts
@@ -6,7 +6,18 @@
  */
 
 //indent input
-// TODO: add input
+void
+function(void)
+{
+	if(cond)stmt();
+}
 //indent end
 
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+	if (cond)
+		stmt();
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_newline.c
diff -u src/tests/usr.bin/indent/lsym_newline.c:1.3 src/tests/usr.bin/indent/lsym_newline.c:1.4
--- src/tests/usr.bin/indent/lsym_newline.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_newline.c	Sun Apr 24 10:36:37 2022
@@ -1,17 +1,34 @@
-/* $NetBSD: lsym_newline.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_newline.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_newline, which represents a forced line break in
  * the source code.
  *
- * Indent preserves most of the line breaks from the original code.
+ * A newline ends an end-of-line comment that has been started with '//'.
+ *
+ * When a line ends with a backslash immediately followed by '\n', these two
+ * characters are merged and continue the logical line (C11 5.1.1.2p1i2).
+ *
+ * In other contexts, a newline is an ordinary space character from a
+ * compiler's point of view. Indent preserves most line breaks though.
  *
  * See also:
  *	lsym_form_feed.c
  */
 
+
 //indent input
-// TODO: add input
+int var=
+1
+	+2
+		+3
+			+4;
 //indent end
 
-//indent run-equals-input
+//indent run
+int		var =
+1
++ 2
++ 3
++ 4;
+//indent end
Index: src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c
diff -u src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c:1.3 src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c:1.4
--- src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_rparen_or_rbracket.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_rparen_or_rbracket.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_rparen_or_lbracket, which represents ')' or ']',
@@ -9,7 +9,12 @@
  */
 
 //indent input
-// TODO: add input
+int var = (3);
+int cast = (int)3;
+int cast = (int)(3);
+int call = function(3);
+int array[3] = {1, 2, 3};
+int array[3] = {[2] = 3};
 //indent end
 
-//indent run-equals-input
+//indent run-equals-input -di0
Index: src/tests/usr.bin/indent/lsym_semicolon.c
diff -u src/tests/usr.bin/indent/lsym_semicolon.c:1.3 src/tests/usr.bin/indent/lsym_semicolon.c:1.4
--- src/tests/usr.bin/indent/lsym_semicolon.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_semicolon.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_semicolon.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_semicolon.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_semicolon, which represents ';' in these contexts:
@@ -12,7 +12,32 @@
  */
 
 //indent input
-// TODO: add input
+struct {
+	int member;
+} global_var;
 //indent end
 
-//indent run-equals-input
+//indent run-equals-input -di0
+
+
+//indent input
+void
+function(void)
+{
+	for ( ; ; )
+		stmt();
+	for (;;)
+		stmt();
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+	for (;;)
+		stmt();
+	for (;;)
+		stmt();
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_storage_class.c
diff -u src/tests/usr.bin/indent/lsym_storage_class.c:1.3 src/tests/usr.bin/indent/lsym_storage_class.c:1.4
--- src/tests/usr.bin/indent/lsym_storage_class.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_storage_class.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_storage_class.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_storage_class.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_storage_class, which represents a storage class as
@@ -6,7 +6,9 @@
  */
 
 //indent input
-// TODO: add input
+static int definition_with_internal_linkage;
+extern int declaration_with_external_linkage;
+int definition_with_external_linkage;
 //indent end
 
-//indent run-equals-input
+//indent run-equals-input -di0
Index: src/tests/usr.bin/indent/lsym_while.c
diff -u src/tests/usr.bin/indent/lsym_while.c:1.3 src/tests/usr.bin/indent/lsym_while.c:1.4
--- src/tests/usr.bin/indent/lsym_while.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_while.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_while.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_while.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token 'lsym_while', which represents the keyword 'while' that
@@ -6,7 +6,22 @@
  */
 
 //indent input
-// TODO: add input
+void
+function(void)
+{
+	while(cond)stmt();
+	do stmt();while(cond);
+}
 //indent end
 
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+	while (cond)
+		stmt();
+	do
+		stmt();
+	while (cond);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_decl.c
diff -u src/tests/usr.bin/indent/psym_decl.c:1.3 src/tests/usr.bin/indent/psym_decl.c:1.4
--- src/tests/usr.bin/indent/psym_decl.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_decl.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_decl.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_decl.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the parser symbol psym_decl, which represents a declaration.
@@ -6,13 +6,33 @@
  * Since C99, declarations and statements can be mixed in blocks.
  *
  * A label can be followed by a statement but not by a declaration.
+ *
+ * Indent distinguishes global and local declarations.
+ *
+ * Declarations can be for functions or for variables.
  */
 
 // TODO: prove that psym_decl can only ever occur at the top of the stack.
 // TODO: delete decl_level if the above is proven.
 
 //indent input
-// TODO: add input
+int global_var;
+int global_array = [1,2,3,4];
+int global_array = [
+1
+,2,
+3,
+4,
+];
 //indent end
 
-//indent run-equals-input
+//indent run -di0
+int global_var;
+int global_array = [1, 2, 3, 4];
+int global_array = [
+		    1
+		    ,2,
+		    3,
+		    4,
+];
+//indent end
Index: src/tests/usr.bin/indent/psym_do.c
diff -u src/tests/usr.bin/indent/psym_do.c:1.3 src/tests/usr.bin/indent/psym_do.c:1.4
--- src/tests/usr.bin/indent/psym_do.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_do.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_do.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_do.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the parser symbol psym_do, which represents the state after
@@ -6,7 +6,43 @@
  */
 
 //indent input
-// TODO: add input
+void function(void) {
+	do stmt(); while (0);
+	do {} while (0);
+}
 //indent end
 
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+	do
+		stmt();
+	while (0);
+	do {
+	} while (0);
+}
+//indent end
+
+
+/*
+ * The keyword 'do' is followed by a statement, as opposed to 'while', which
+ * is followed by a parenthesized expression.
+ */
+//indent input
+void
+function(void)
+{
+	do(var)--;while(var>0);
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+	do
+		(var)--;
+	while (var > 0);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_do_stmt.c
diff -u src/tests/usr.bin/indent/psym_do_stmt.c:1.3 src/tests/usr.bin/indent/psym_do_stmt.c:1.4
--- src/tests/usr.bin/indent/psym_do_stmt.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_do_stmt.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_do_stmt.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_do_stmt.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the parser symbol psym_do_stmt, which represents the state after
@@ -7,7 +7,25 @@
  */
 
 //indent input
-// TODO: add input
+void function(void) {
+	do stmt(); while (0);
+	do { stmt(); } while (0);
+	do /* comment */ stmt(); while (0);
+}
 //indent end
 
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+	do
+		stmt();
+	while (0);
+	do {
+		stmt();
+	} while (0);
+	do			/* comment */
+		stmt();
+	while (0);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_else.c
diff -u src/tests/usr.bin/indent/psym_else.c:1.3 src/tests/usr.bin/indent/psym_else.c:1.4
--- src/tests/usr.bin/indent/psym_else.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_else.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_else.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_else.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the parser symbol psym_else, which represents the keyword 'else'
@@ -7,8 +7,67 @@
  * This parser symbol never ends up on the stack itself.
  */
 
+/*
+ * When parsing nested incomplete 'if' statements, the problem of the
+ * 'dangling else' occurs.  It is resolved by binding the 'else' to the
+ * innermost incomplete 'if' statement.
+ *
+ * In 'parse', an if_expr_stmt is reduced to a simple statement, unless the
+ * next token is 'else'. The comment does not influence this since it never
+ * reaches 'parse'.
+ */
+//indent input
+void
+example(bool cond)
+{
+	if (cond)
+	if (cond)
+	if (cond)
+	stmt();
+	else
+	stmt();
+	/* comment */
+	else
+	stmt();
+}
+//indent end
+
+//indent run
+void
+example(bool cond)
+{
+	if (cond)
+		if (cond)
+			if (cond)
+				stmt();
+			else
+				stmt();
+	/* comment */
+		else
+			stmt();
+}
+//indent end
+
+
+/*
+ * The keyword 'else' is followed by an expression, as opposed to 'if', which
+ * is followed by a parenthesized expression.
+ */
 //indent input
-// TODO: add input
+void
+function(void)
+{
+	if(var>0)var=0;else(var=3);
+}
 //indent end
 
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+	if (var > 0)
+		var = 0;
+	else
+		(var = 3);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_stmt.c
diff -u src/tests/usr.bin/indent/psym_stmt.c:1.3 src/tests/usr.bin/indent/psym_stmt.c:1.4
--- src/tests/usr.bin/indent/psym_stmt.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_stmt.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_stmt.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_stmt.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the parser symbol psym_stmt, which represents a statement on the
@@ -8,7 +8,21 @@
  */
 
 //indent input
-// TODO: add input
+#define unless(cond) if (!(cond))
+
+void
+function(void)
+{
+	stmt();
+	stmt;			/* probably some macro */
+
+	unless(cond)
+		stmt();
+}
 //indent end
 
+/*
+ * There is no space after 'unless' since indent cannot know that it is a
+ * syntactic macro, especially not when its definition is in a header file.
+ */
 //indent run-equals-input
Index: src/tests/usr.bin/indent/psym_stmt_list.c
diff -u src/tests/usr.bin/indent/psym_stmt_list.c:1.3 src/tests/usr.bin/indent/psym_stmt_list.c:1.4
--- src/tests/usr.bin/indent/psym_stmt_list.c:1.3	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_stmt_list.c	Sun Apr 24 10:36:37 2022
@@ -1,14 +1,47 @@
-/* $NetBSD: psym_stmt_list.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_stmt_list.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the parser symbol psym_stmt_list, which represents a list of
  * statements.
  *
+ * Since C99, in such a statement list, statements can be intermixed with
+ * declarations.
+ *
  * TODO: explain why psym_stmt and psym_stmt_list are both necessary.
  */
 
 //indent input
-// TODO: add input
+void
+function(void)
+{
+	stmt();
+	int var;
+	stmt();
+	{
+		stmt();
+		int var;
+		stmt();
+	}
+}
 //indent end
 
-//indent run-equals-input
+//indent run-equals-input -ldi0
+
+
+//indent input
+void
+return_after_rbrace(void)
+{
+	{}return;
+}
+//indent end
+
+//indent run
+void
+return_after_rbrace(void)
+{
+	{
+// $ FIXME: The 'return' must go in a separate line.
+	} return;
+}
+//indent end

Index: src/tests/usr.bin/indent/lsym_preprocessing.c
diff -u src/tests/usr.bin/indent/lsym_preprocessing.c:1.4 src/tests/usr.bin/indent/lsym_preprocessing.c:1.5
--- src/tests/usr.bin/indent/lsym_preprocessing.c:1.4	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_preprocessing.c	Sun Apr 24 10:36:37 2022
@@ -1,9 +1,15 @@
-/* $NetBSD: lsym_preprocessing.c,v 1.4 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_preprocessing.c,v 1.5 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_preprocessing, which represents a '#' that starts
  * a preprocessing line.
  *
+ * #define
+ * #ifdef
+ * #include
+ * #line
+ * #pragma
+ *
  * The whole preprocessing line is processed separately from the main source
  * code, without much tokenizing or parsing.
  */
@@ -39,3 +45,171 @@
 // TODO: backslash-newline
 // TODO: block comment
 // TODO: line comment
+
+
+//indent input
+#include <system-header.h>
+#include "local-header.h"
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Nested conditional compilation.
+ */
+//indent input
+#if 0
+#else
+#endif
+
+#if 0 /* if comment */
+#else /* else comment */
+#endif /* endif comment */
+
+#if 0 /* outer if comment */
+#  if nested /* inner if comment */
+#  else /* inner else comment */
+#  endif /* inner endif comment */
+#endif /* outer endif comment */
+//indent end
+
+//indent run
+#if 0
+#else
+#endif
+
+#if 0				/* if comment */
+#else				/* else comment */
+#endif				/* endif comment */
+
+#if 0				/* outer if comment */
+/* $ XXX: The indentation is removed, which can get confusing */
+#if nested			/* inner if comment */
+#else				/* inner else comment */
+#endif				/* inner endif comment */
+#endif				/* outer endif comment */
+//indent end
+
+
+//indent input
+#define multi_line_definition /* first line
+ * middle
+ * final line
+ */ actual_value
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Before indent.c 1.129 from 2021-10-08, indent mistakenly interpreted quotes
+ * in comments as starting a string literal. The '"' in the comment started a
+ * string, the next '"' finished the string, and the following '/' '*' was
+ * interpreted as the beginning of a comment. This comment lasted until the
+ * next '*' '/', which in this test is another preprocessor directive, solely
+ * for symmetry.
+ *
+ * The effect was that the extra space after d2 was not formatted, as that
+ * line was considered part of the comment.
+ */
+//indent input
+#define comment_in_string_literal "/* no comment "
+int this_is_an_ordinary_line_again;
+
+int d1 ;
+#define confuse_d /*"*/ "/*"
+int d2 ;
+#define resolve_d "*/"
+int d3 ;
+
+int s1 ;
+#define confuse_s /*'*/ '/*'
+int s2 ;
+#define resolve_s '*/'
+int s3 ;
+//indent end
+
+//indent run
+#define comment_in_string_literal "/* no comment "
+int		this_is_an_ordinary_line_again;
+
+int		d1;
+#define confuse_d /*"*/ "/*"
+int		d2;
+#define resolve_d "*/"
+int		d3;
+
+int		s1;
+#define confuse_s /*'*/ '/*'
+int		s2;
+#define resolve_s '*/'
+int		s3;
+//indent end
+
+
+/*
+ * A preprocessing directive inside an expression keeps the state about
+ * whether the next operator is unary or binary.
+ */
+//indent input
+int binary_plus = 3
+#define intermediate 1
+	+4;
+int unary_plus =
+#define intermediate 1
+	+ 4;
+//indent end
+
+//indent run
+int		binary_plus = 3
+#define intermediate 1
++ 4;
+int		unary_plus =
+#define intermediate 1
++4;
+//indent end
+
+
+/*
+ * Before io.c 1.135 from 2021-11-26, indent fixed malformed preprocessing
+ * lines that had arguments even though they shouldn't. It is not the task of
+ * an indenter to fix code, that's what a linter is for.
+ */
+//indent input
+#if 0
+#elif 1
+#else if 3
+#endif 0
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Existing comments are indented just like code comments.
+ *
+ * This means that the above wrong preprocessing lines (#else with argument)
+ * need to be fed through indent twice until they become stable. Since
+ * compilers issue warnings about these invalid lines, not much code still has
+ * these, making this automatic fix an edge case.
+ */
+//indent input
+#if 0		/* comment */
+#else		/* comment */
+#endif		/* comment */
+
+#if 0/* comment */
+#else/* comment */
+#endif/* comment */
+//indent end
+
+//indent run
+#if 0				/* comment */
+#else				/* comment */
+#endif				/* comment */
+
+#if 0				/* comment */
+#else				/* comment */
+#endif				/* comment */
+//indent end
Index: src/tests/usr.bin/indent/lsym_tag.c
diff -u src/tests/usr.bin/indent/lsym_tag.c:1.4 src/tests/usr.bin/indent/lsym_tag.c:1.5
--- src/tests/usr.bin/indent/lsym_tag.c:1.4	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_tag.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_tag.c,v 1.4 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_tag.c,v 1.5 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_tag, which represents one of the keywords
@@ -40,3 +40,77 @@ indent_enum_constants(void)
 //indent end
 
 //indent run-equals-input -ci2
+
+
+//indent input
+struct stat {
+	mode_t		st_mode;
+};
+
+union variant {
+	enum {
+	}		tag;
+	int		v_int;
+	long		v_long;
+	bool		v_bool;
+	void	       *v_pointer;
+};
+//indent end
+
+//indent run-equals-input
+
+
+/* See FreeBSD r303485. */
+//indent input
+int f(struct x *a);
+
+void
+t(void)
+{
+	static const struct {
+		int	a;
+		int	b;
+	} c[] = {
+		{ D, E },
+		{ F, G }
+	};
+}
+
+void u(struct x a) {
+	int b;
+	struct y c = (struct y *)&a;
+}
+//indent end
+
+//indent run
+int		f(struct x *a);
+
+void
+t(void)
+{
+	static const struct {
+		int		a;
+		int		b;
+	}		c[] = {
+		{D, E},
+		{F, G}
+	};
+}
+
+void
+u(struct x a)
+{
+	int		b;
+	struct y	c = (struct y *)&a;
+}
+//indent end
+
+
+/* Comment between 'struct' and the tag name; doesn't occur in practice. */
+//indent input
+struct   /* comment */   tag var;
+//indent end
+
+//indent run -di0
+struct /* comment */ tag var;
+//indent end
Index: src/tests/usr.bin/indent/lsym_unary_op.c
diff -u src/tests/usr.bin/indent/lsym_unary_op.c:1.4 src/tests/usr.bin/indent/lsym_unary_op.c:1.5
--- src/tests/usr.bin/indent/lsym_unary_op.c:1.4	Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_unary_op.c	Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_unary_op.c,v 1.4 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_unary_op.c,v 1.5 2022/04/24 10:36:37 rillig Exp $ */
 
 /*
  * Tests for the token lsym_unary_op, which represents a unary operator.
@@ -20,3 +20,37 @@ unary_operators(void)
 //indent end
 
 //indent run-equals-input
+
+
+/*
+ * The unary operators '+' and '-' can occur in long chains.  In these chains,
+ * adjacent '+' must not be merged to '++' since that would be a different
+ * token.  The same applies to '&', but that case is irrelevant in practice
+ * since the address of an address cannot be taken.
+ */
+//indent input
+int var=+3;
+int mixed=+-+-+-+-+-+-+-+-+-+-+-+-+-3;
+int count=~-~-~-~-~-~-~-~-~-~-~-~-~-3;
+int same = + + + + + - - - - - 3;
+//indent end
+
+//indent run -di0
+int var = +3;
+int mixed = +-+-+-+-+-+-+-+-+-+-+-+-+-3;
+int count = ~-~-~-~-~-~-~-~-~-~-~-~-~-3;
+int same = + + + + +- - - - -3;
+//indent end
+
+
+/*
+ * A special kind of unary operator is '->', which additionally suppresses the
+ * next space.
+ */
+//indent input
+int var = p -> member;
+//indent end
+
+//indent run -di0
+int var = p->member;
+//indent end

Added files:

Index: src/tests/usr.bin/indent/edge_cases.c
diff -u /dev/null src/tests/usr.bin/indent/edge_cases.c:1.1
--- /dev/null	Sun Apr 24 10:36:37 2022
+++ src/tests/usr.bin/indent/edge_cases.c	Sun Apr 24 10:36:37 2022
@@ -0,0 +1,42 @@
+/* $NetBSD: edge_cases.c,v 1.1 2022/04/24 10:36:37 rillig Exp $ */
+
+/*
+ * Tests for edge cases in the C programming language that indent does not
+ * support or in which cases indent behaves strangely.
+ */
+
+/*
+ * Digraphs are replacements for the characters '[', '{' and '#', which are
+ * missing in some exotic restricted source character sets.
+ *
+ * See C99 6.4.6
+ */
+//indent input
+void
+digraphs(void)
+{
+	/* same as 'array[subscript]' */
+	number = array<:subscript:>;
+
+	/* same as '(int){ initializer }' */
+	number = (int)<% initializer %>;
+}
+//indent end
+
+//indent run
+void
+digraphs(void)
+{
+	/* same as 'array[subscript]' */
+// $ XXX: The indentation is completely wrong.
+// $ XXX: The space between 'array' and '<' doesn't belong there.
+number = array <:subscript:>;
+
+	/* same as '(int){ initializer }' */
+// $ XXX: The space between '%' and '>' doesn't belong there.
+	number = (int)<%initializer % >;
+}
+//indent end
+
+/* TODO: test trigraphs, which are as unusual as digraphs */
+/* TODO: test digraphs and trigraphs in string literals, just for fun */
Index: src/tests/usr.bin/indent/fmt_init.c
diff -u /dev/null src/tests/usr.bin/indent/fmt_init.c:1.1
--- /dev/null	Sun Apr 24 10:36:37 2022
+++ src/tests/usr.bin/indent/fmt_init.c	Sun Apr 24 10:36:37 2022
@@ -0,0 +1,39 @@
+/* $NetBSD: fmt_init.c,v 1.1 2022/04/24 10:36:37 rillig Exp $ */
+
+/*
+ * Tests for variable initializations.
+ */
+
+//indent input
+int global = { initializer };
+int global = {
+	initializer
+};
+
+void
+example(void)
+{
+	int local = { initializer };
+	int local = {
+		initializer
+	};
+}
+//indent end
+
+//indent run -di0
+// $ XXX: The spaces around the initializer are gone.
+int global = {initializer};
+int global = {
+	initializer
+};
+
+void
+example(void)
+{
+	// $ XXX: The spaces around the initializer are gone.
+	int local = {initializer};
+	int local = {
+		initializer
+	};
+}
+//indent end

Reply via email to