Module Name: src
Committed By: rillig
Date: Sun Apr 24 10:36:37 UTC 2022
Modified Files:
src/distrib/sets/lists/tests: mi
src/tests/usr.bin/indent: Makefile fmt_decl.c lsym_binary_op.c
lsym_comment.c lsym_do.c lsym_eof.c lsym_for.c lsym_form_feed.c
lsym_funcname.c lsym_if.c lsym_newline.c lsym_preprocessing.c
lsym_rparen_or_rbracket.c lsym_semicolon.c lsym_storage_class.c
lsym_tag.c lsym_typedef.c lsym_unary_op.c lsym_while.c lsym_word.c
psym_decl.c psym_do.c psym_do_stmt.c psym_else.c psym_stmt.c
psym_stmt_list.c
Added Files:
src/tests/usr.bin/indent: edge_cases.c fmt_init.c
Removed Files:
src/tests/usr.bin/indent: token_binary_op.c token_comment.c
token_decl.c token_do_stmt.c token_end_of_file.c token_for_exprs.c
token_form_feed.c token_funcname.c token_ident.c token_keyword_do.c
token_keyword_do_else.c token_keyword_else.c
token_keyword_for_if_while.c token_keyword_struct_union_enum.c
token_newline.c token_postfix_op.c token_preprocessing.c
token_rparen.c token_semicolon.c token_stmt.c token_stmt_list.c
token_storage_class.c token_string_prefix.c token_switch_expr.c
token_type_def.c token_unary_op.c token_while_expr.c
Log Message:
tests/indent: migrate token tests to other tests
In indent.h 1.49 from 2021-10-25, the enumeration token_type was split
into lexer_symbol and parser_symbol to more clearly express that these
tokens fall into completely different classes of usage patterns.
To generate a diff of this commit:
cvs rdiff -u -r1.1197 -r1.1198 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.44 -r1.45 src/tests/usr.bin/indent/Makefile
cvs rdiff -u -r0 -r1.1 src/tests/usr.bin/indent/edge_cases.c \
src/tests/usr.bin/indent/fmt_init.c
cvs rdiff -u -r1.35 -r1.36 src/tests/usr.bin/indent/fmt_decl.c
cvs rdiff -u -r1.5 -r1.6 src/tests/usr.bin/indent/lsym_binary_op.c \
src/tests/usr.bin/indent/lsym_typedef.c \
src/tests/usr.bin/indent/lsym_word.c
cvs rdiff -u -r1.3 -r1.4 src/tests/usr.bin/indent/lsym_comment.c \
src/tests/usr.bin/indent/lsym_do.c src/tests/usr.bin/indent/lsym_eof.c \
src/tests/usr.bin/indent/lsym_for.c \
src/tests/usr.bin/indent/lsym_form_feed.c \
src/tests/usr.bin/indent/lsym_funcname.c \
src/tests/usr.bin/indent/lsym_if.c \
src/tests/usr.bin/indent/lsym_newline.c \
src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c \
src/tests/usr.bin/indent/lsym_semicolon.c \
src/tests/usr.bin/indent/lsym_storage_class.c \
src/tests/usr.bin/indent/lsym_while.c \
src/tests/usr.bin/indent/psym_decl.c src/tests/usr.bin/indent/psym_do.c \
src/tests/usr.bin/indent/psym_do_stmt.c \
src/tests/usr.bin/indent/psym_else.c src/tests/usr.bin/indent/psym_stmt.c \
src/tests/usr.bin/indent/psym_stmt_list.c
cvs rdiff -u -r1.4 -r1.5 src/tests/usr.bin/indent/lsym_preprocessing.c \
src/tests/usr.bin/indent/lsym_tag.c \
src/tests/usr.bin/indent/lsym_unary_op.c
cvs rdiff -u -r1.13 -r0 src/tests/usr.bin/indent/token_binary_op.c
cvs rdiff -u -r1.30 -r0 src/tests/usr.bin/indent/token_comment.c
cvs rdiff -u -r1.3 -r0 src/tests/usr.bin/indent/token_decl.c \
src/tests/usr.bin/indent/token_do_stmt.c \
src/tests/usr.bin/indent/token_end_of_file.c \
src/tests/usr.bin/indent/token_funcname.c \
src/tests/usr.bin/indent/token_keyword_do.c \
src/tests/usr.bin/indent/token_keyword_do_else.c \
src/tests/usr.bin/indent/token_keyword_else.c \
src/tests/usr.bin/indent/token_keyword_for_if_while.c \
src/tests/usr.bin/indent/token_postfix_op.c \
src/tests/usr.bin/indent/token_rparen.c \
src/tests/usr.bin/indent/token_stmt.c \
src/tests/usr.bin/indent/token_stmt_list.c \
src/tests/usr.bin/indent/token_storage_class.c \
src/tests/usr.bin/indent/token_switch_expr.c \
src/tests/usr.bin/indent/token_type_def.c
cvs rdiff -u -r1.4 -r0 src/tests/usr.bin/indent/token_for_exprs.c \
src/tests/usr.bin/indent/token_newline.c \
src/tests/usr.bin/indent/token_preprocessing.c \
src/tests/usr.bin/indent/token_semicolon.c \
src/tests/usr.bin/indent/token_unary_op.c \
src/tests/usr.bin/indent/token_while_expr.c
cvs rdiff -u -r1.5 -r0 src/tests/usr.bin/indent/token_form_feed.c \
src/tests/usr.bin/indent/token_keyword_struct_union_enum.c \
src/tests/usr.bin/indent/token_string_prefix.c
cvs rdiff -u -r1.7 -r0 src/tests/usr.bin/indent/token_ident.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.1197 src/distrib/sets/lists/tests/mi:1.1198
--- src/distrib/sets/lists/tests/mi:1.1197 Sun Apr 24 08:48:17 2022
+++ src/distrib/sets/lists/tests/mi Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1197 2022/04/24 08:48:17 rillig Exp $
+# $NetBSD: mi,v 1.1198 2022/04/24 10:36:37 rillig Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
#
@@ -4771,6 +4771,7 @@
./usr/tests/usr.bin/indent/declarations.0 tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/declarations.0.stderr tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/declarations.0.stdout tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/edge_cases.c tests-usr.bin-tests compattestfile,atf
./usr/tests/usr.bin/indent/elsecomment.0 tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/elsecomment.0.pro tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/elsecomment.0.stdout tests-obsolete obsolete,atf
@@ -4782,6 +4783,7 @@
./usr/tests/usr.bin/indent/fmt_decl.c tests-usr.bin-tests compattestfile,atf
./usr/tests/usr.bin/indent/fmt_else_comment.c tests-usr.bin-tests compattestfile,atf
./usr/tests/usr.bin/indent/fmt_expr.c tests-usr.bin-tests compattestfile,atf
+./usr/tests/usr.bin/indent/fmt_init.c tests-usr.bin-tests compattestfile,atf
./usr/tests/usr.bin/indent/indent_off_on.c tests-usr.bin-tests compattestfile,atf
./usr/tests/usr.bin/indent/indent_variables.0 tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/indent_variables.0.pro tests-obsolete obsolete,atf
@@ -5253,44 +5255,44 @@
./usr/tests/usr.bin/indent/token-while_expr.0 tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token-while_expr.0.pro tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token-while_expr.0.stdout tests-obsolete obsolete,atf
-./usr/tests/usr.bin/indent/token_binary_op.c tests-usr.bin-tests compattestfile,atf
+./usr/tests/usr.bin/indent/token_binary_op.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_case_label.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_colon.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_comma.c tests-obsolete obsolete,atf
-./usr/tests/usr.bin/indent/token_comment.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_decl.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_do_stmt.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_end_of_file.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_for_exprs.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_form_feed.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_funcname.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_ident.c tests-usr.bin-tests compattestfile,atf
+./usr/tests/usr.bin/indent/token_comment.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_decl.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_do_stmt.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_end_of_file.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_for_exprs.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_form_feed.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_funcname.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_ident.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_if_expr.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_if_expr_stmt.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_if_expr_stmt_else.c tests-obsolete obsolete,atf
-./usr/tests/usr.bin/indent/token_keyword_do.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_do_else.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_else.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_for_if_while.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_keyword_struct_union_enum.c tests-usr.bin-tests compattestfile,atf
+./usr/tests/usr.bin/indent/token_keyword_do.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_do_else.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_else.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_for_if_while.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_keyword_struct_union_enum.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_lbrace.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_lparen.c tests-obsolete obsolete,atf
-./usr/tests/usr.bin/indent/token_newline.c tests-usr.bin-tests compattestfile,atf
+./usr/tests/usr.bin/indent/token_newline.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_period.c tests-obsolete obsolete,atf
-./usr/tests/usr.bin/indent/token_postfix_op.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_preprocessing.c tests-usr.bin-tests compattestfile,atf
+./usr/tests/usr.bin/indent/token_postfix_op.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_preprocessing.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_question.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/token_rbrace.c tests-obsolete obsolete,atf
-./usr/tests/usr.bin/indent/token_rparen.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_semicolon.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_stmt.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_stmt_list.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_storage_class.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_string_prefix.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_switch_expr.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_type_def.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_unary_op.c tests-usr.bin-tests compattestfile,atf
-./usr/tests/usr.bin/indent/token_while_expr.c tests-usr.bin-tests compattestfile,atf
+./usr/tests/usr.bin/indent/token_rparen.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_semicolon.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_stmt.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_stmt_list.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_storage_class.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_string_prefix.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_switch_expr.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_type_def.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_unary_op.c tests-obsolete obsolete,atf
+./usr/tests/usr.bin/indent/token_while_expr.c tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/types_from_file.0 tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/types_from_file.0.list tests-obsolete obsolete,atf
./usr/tests/usr.bin/indent/types_from_file.0.pro tests-obsolete obsolete,atf
Index: src/tests/usr.bin/indent/Makefile
diff -u src/tests/usr.bin/indent/Makefile:1.44 src/tests/usr.bin/indent/Makefile:1.45
--- src/tests/usr.bin/indent/Makefile:1.44 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/Makefile Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.44 2022/04/24 09:04:12 rillig Exp $
+# $NetBSD: Makefile,v 1.45 2022/04/24 10:36:37 rillig Exp $
.include <bsd.own.mk>
@@ -8,10 +8,12 @@ TESTS_SH+= t_misc
TESTS_SH+= t_options
FILESDIR= ${TESTSDIR}
+FILES+= edge_cases.c
FILES+= fmt_block.c
FILES+= fmt_decl.c
FILES+= fmt_else_comment.c
FILES+= fmt_expr.c
+FILES+= fmt_init.c
FILES+= indent_off_on.c
FILES+= label.c
FILES+= lex_char.c
@@ -110,33 +112,6 @@ FILES+= psym_stmt_list.c
FILES+= psym_switch_expr.c
FILES+= psym_while_expr.c
FILES+= t_options.awk
-FILES+= token_binary_op.c
-FILES+= token_comment.c
-FILES+= token_decl.c
-FILES+= token_do_stmt.c
-FILES+= token_end_of_file.c
-FILES+= token_for_exprs.c
-FILES+= token_form_feed.c
-FILES+= token_funcname.c
-FILES+= token_ident.c
-FILES+= token_keyword_do.c
-FILES+= token_keyword_do_else.c
-FILES+= token_keyword_else.c
-FILES+= token_keyword_for_if_while.c
-FILES+= token_keyword_struct_union_enum.c
-FILES+= token_newline.c
-FILES+= token_postfix_op.c
-FILES+= token_preprocessing.c
-FILES+= token_rparen.c
-FILES+= token_semicolon.c
-FILES+= token_stmt.c
-FILES+= token_stmt_list.c
-FILES+= token_storage_class.c
-FILES+= token_string_prefix.c
-FILES+= token_switch_expr.c
-FILES+= token_type_def.c
-FILES+= token_unary_op.c
-FILES+= token_while_expr.c
add-test: .PHONY
@set -eu; \
Index: src/tests/usr.bin/indent/fmt_decl.c
diff -u src/tests/usr.bin/indent/fmt_decl.c:1.35 src/tests/usr.bin/indent/fmt_decl.c:1.36
--- src/tests/usr.bin/indent/fmt_decl.c:1.35 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/fmt_decl.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: fmt_decl.c,v 1.35 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: fmt_decl.c,v 1.36 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for declarations of global variables, external functions, and local
@@ -884,3 +884,24 @@ a(char *fe)
{
}
//indent end
+
+
+/*
+ * Before NetBSD indent.c 1.178 from 2021-10-29, indent removed the blank
+ * before the '=', in the second and third of these function pointer
+ * declarations. This was because indent interpreted the prototype parameters
+ * 'int' and 'int, int' as type casts, which doesn't make sense at all. Fixing
+ * this properly requires large style changes since indent is based on simple
+ * heuristics all over. This didn't change in indent.c 1.178; instead, the
+ * rule for inserting a blank before a binary operator was changed to always
+ * insert a blank, except at the beginning of a line.
+ */
+//indent input
+char *(*fn)() = NULL;
+char *(*fn)(int) = NULL;
+char *(*fn)(int, int) = NULL;
+//indent end
+
+/* XXX: The parameter '(int)' is wrongly interpreted as a type cast. */
+/* XXX: The parameter '(int, int)' is wrongly interpreted as a type cast. */
+//indent run-equals-input -di0
Index: src/tests/usr.bin/indent/lsym_binary_op.c
diff -u src/tests/usr.bin/indent/lsym_binary_op.c:1.5 src/tests/usr.bin/indent/lsym_binary_op.c:1.6
--- src/tests/usr.bin/indent/lsym_binary_op.c:1.5 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_binary_op.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_binary_op.c,v 1.5 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_binary_op.c,v 1.6 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_binary_op, which represents a binary operator in
@@ -74,3 +74,89 @@ int var = expr**ptr;
//indent run -di0
int var = expr * *ptr;
//indent end
+
+
+/*
+ * When indent tokenizes some operators, it allows for
+ * arbitrary repetitions of the operator character, followed by an
+ * arbitrary amount of '='. This is used for operators like '&&' or
+ * '|||==='.
+ *
+ * Before 2021-03-07 22:11:01, the comment '//' was treated as an
+ * operator as well, and so was the comment '/////', leading to
+ * unexpected results.
+ *
+ * See lexi.c, lexi, "default:".
+ */
+//indent input
+void
+long_run_of_operators(void)
+{
+ if (a &&&&&&& b)
+ return;
+ if (a |||=== b)
+ return;
+}
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Long chains of '+' and '-' must be split into several operators as the
+ * lexer has to distinguish between '++' and '+' early. The following
+ * sequence is thus tokenized as:
+ *
+ * word "a"
+ * postfix_op "++"
+ * binary_op "++"
+ * unary_op "++"
+ * unary_op "+"
+ * word "b"
+ *
+ * See lexi.c, lexi, "case '+':".
+ */
+//indent input
+void
+joined_unary_and_binary_operators(void)
+{
+ if (a +++++++ b)
+ return;
+}
+//indent end
+
+//indent run
+void
+joined_unary_and_binary_operators(void)
+{
+ if (a++ ++ ++ +b)
+ return;
+}
+//indent end
+
+
+/*
+ * Ensure that the result of the indentation does not depend on whether a
+ * token from the input starts in column 1 or 9.
+ *
+ * See process_binary_op, ps.curr_col_1.
+ */
+//indent input
+int col_1 //
+= //
+1;
+
+int col_9 //
+ = //
+ 9;
+//indent end
+
+//indent run
+int col_1 //
+= //
+1;
+
+int col_9 //
+= //
+9;
+//indent end
Index: src/tests/usr.bin/indent/lsym_typedef.c
diff -u src/tests/usr.bin/indent/lsym_typedef.c:1.5 src/tests/usr.bin/indent/lsym_typedef.c:1.6
--- src/tests/usr.bin/indent/lsym_typedef.c:1.5 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_typedef.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_typedef.c,v 1.5 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_typedef.c,v 1.6 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_typedef, which represents the keyword 'typedef'
@@ -48,3 +48,14 @@ enum {
EC2
} E;
//indent end
+
+
+/*
+ * Contrary to declarations, type definitions are not affected by the option
+ * '-di'.
+ */
+//indent input
+typedef int number;
+//indent end
+
+//indent run-equals-input
Index: src/tests/usr.bin/indent/lsym_word.c
diff -u src/tests/usr.bin/indent/lsym_word.c:1.5 src/tests/usr.bin/indent/lsym_word.c:1.6
--- src/tests/usr.bin/indent/lsym_word.c:1.5 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_word.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_word.c,v 1.5 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_word.c,v 1.6 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_word, which represents a constant, a string
@@ -11,6 +11,8 @@
// TODO: Is '"string"(' syntactically valid in any context?
// TODO: Is '123(' syntactically valid in any context?
// TODO: Would the output of the above depend on -pcs/-npcs?
+// TODO: Add more systematic tests.
+// TODO: Completely cover each state transition in lex_number_state.
//indent input
// TODO: add input
@@ -32,3 +34,114 @@ int var\
//indent run
int var + name = 4;
//indent end
+
+
+//indent input
+wchar_t wide_string[] = L"wide string";
+//indent end
+
+/*
+ * Regardless of the line length, the 'L' must never be separated from the
+ * string literal. Before lexi.c 1.167 from 2021-11-28, the 'L' was a
+ * separate token, which could have resulted in accidental spacing between the
+ * 'L' and the following "".
+ */
+//indent run-equals-input -di0
+
+//indent run-equals-input -di0 -l25
+
+//indent run-equals-input -di0 -l1
+
+
+//indent input
+wchar_t wide_char[] = L'w';
+//indent end
+
+//indent run-equals-input -di0
+
+
+/* Binary number literals, a GCC extension that was added in C11. */
+//indent input
+#define b00101010 -1
+void t(void) {
+ unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
+ float x[] = {.7f, 0.7f};
+ unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
+
+ if (0 b00101010)
+ return;
+ /* $ '0r' is not a number base prefix, so the tokens are split. */
+ if (0r12345)
+ return;
+}
+//indent end
+
+//indent run
+#define b00101010 -1
+void
+t(void)
+{
+ unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
+ float x[] = {.7f, 0.7f};
+ unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
+
+ if (0 b00101010)
+ return;
+ if (0 r12345)
+ return;
+}
+//indent end
+
+
+/* Floating point numbers. */
+//indent input
+void t(void) {
+ unsigned long x = 314UL;
+ double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
+ int z = 0b0101;
+ DO_NOTHING;
+ x._y = 5;
+}
+//indent end
+
+//indent run
+void
+t(void)
+{
+ unsigned long x = 314UL;
+ double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
+ int z = 0b0101;
+ DO_NOTHING;
+ x._y = 5;
+}
+//indent end
+
+
+/*
+ * Test identifiers containing '$', which some compilers support as an
+ * extension to the C standard.
+ */
+//indent input
+int $ = jQuery; // just kidding
+const char SYS$LOGIN[]="$HOME";
+//indent end
+
+//indent run
+int $ = jQuery; // just kidding
+const char SYS$LOGIN[] = "$HOME";
+//indent end
+
+
+/*
+ * Test the tokenizer for number constants.
+ *
+ * When the tokenizer reads a character that makes a token invalid (such as
+ * '0x') but may later be extended to form a valid token (such as '0x123'),
+ * indent does not care about this invalid prefix and returns it nevertheless.
+ */
+//indent input
+int unfinished_hex_prefix = 0x;
+double unfinished_hex_float = 0x123p;
+//indent end
+
+//indent run-equals-input -di0
Index: src/tests/usr.bin/indent/lsym_comment.c
diff -u src/tests/usr.bin/indent/lsym_comment.c:1.3 src/tests/usr.bin/indent/lsym_comment.c:1.4
--- src/tests/usr.bin/indent/lsym_comment.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_comment.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_comment.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_comment.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_comment, which starts a comment.
@@ -11,8 +11,1085 @@
* token_comment.c
*/
+/*-
+ * TODO: systematically test comments
+ *
+ * - starting in column 1, with opt.format_col1_comments (-fc1)
+ * - starting in column 1, without opt.format_col1_comments (-fc1)
+ * - starting in column 9, independent of opt.format_col1_comments (-fc1)
+ * - starting in column 33, the default
+ * - starting in column 65, which is already close to the default right margin
+ * - starting in column 81, spilling into the right margin
+ *
+ * - block comment starting with '/' '*' '-'
+ * - block comment starting with '/' '*' '*'
+ * - block comment starting with '/' '*' '\n'
+ * - end-of-line comment starting with '//'
+ * - end-of-line comment starting with '//x', so without leading space
+ * - block comment starting with '/' '*' 'x', so without leading space
+ *
+ * - block/end-of-line comment to the right of a label
+ * - block/end-of-line comment to the right of code
+ * - block/end-of-line comment to the right of label with code
+ *
+ * - with/without opt.comment_delimiter_on_blankline (-cdb)
+ * - with/without opt.star_comment_cont (-sc)
+ * - with/without opt.format_block_comments (-fbc)
+ * - with varying opt.max_line_length (32, 64, 80, 140)
+ * - with varying opt.unindent_displace (-d0, -d2, -d-5)
+ * - with varying opt.indent_size (3, 4, 8)
+ * - with varying opt.tabsize (3, 4, 8, 16)
+ * - with varying opt.block_comment_max_line_length (-lc60, -lc78, -lc90)
+ * - with varying opt.comment_column (-c0, -c1, -c33, -c80)
+ * - with varying opt.decl_comment_column (-cd0, -cd1, -cd20, -cd33, -cd80)
+ * - with/without ps.decl_on_line
+ * - with/without ps.next_col_1
+ *
+ * - very long comments that overflow the buffer 'com'
+ * - comments that come from save_com
+ * - very long word that already spills over the right margin
+ * - wrap/nowrap comment containing '\n'
+ * - wrap/nowrap comment containing '\f'
+ * - wrap/nowrap comment containing '\t'
+ * - wrap/nowrap comment containing '\b'
+ */
+
+//indent input
+typedef enum x {
+ aaaaaaaaaaaaaaaaaaaaaa = 1 << 0, /* test a */
+ bbbbbbbbbbbbbbbbb = 1 << 1, /* test b */
+ cccccccccccccc = 1 << 1, /* test c */
+ dddddddddddddddddddddddddddddd = 1 << 2 /* test d */
+} x;
+//indent end
+
+//indent run-equals-input -bbb
+
+
+//indent input
+/* See FreeBSD r303597, r303598, r309219, and r309343 */
+void
+t(void) {
+ /*
+ * Old indent wrapped the URL near where this sentence ends.
+ *
+ * https://www.freebsd.org/cgi/man.cgi?query=indent&apropos=0&sektion=0&manpath=FreeBSD+12-current&arch=default&format=html
+ */
+
+ /*
+ * The default maximum line length for comments is 78, and the 'kk' at
+ * the end makes the line exactly 78 bytes long.
+ *
+ * aaaaaa bbbbbb cccccc dddddd eeeeee ffffff ggggg hhhhh iiiii jjjj kk
+ */
+
+ /*
+ * Old indent unnecessarily removed the star comment continuation on the next line.
+ *
+ * *test*
+ */
+
+ /* r309219 Go through linked list, freeing from the malloced (t[-1]) address. */
+
+ /* r309343 */
+}
+//indent end
+
+//indent run -bbb
+/* See FreeBSD r303597, r303598, r309219, and r309343 */
+void
+t(void)
+{
+ /*
+ * Old indent wrapped the URL near where this sentence ends.
+ *
+ * https://www.freebsd.org/cgi/man.cgi?query=indent&apropos=0&sektion=0&manpath=FreeBSD+12-current&arch=default&format=html
+ */
+
+ /*
+ * The default maximum line length for comments is 78, and the 'kk' at
+ * the end makes the line exactly 78 bytes long.
+ *
+ * aaaaaa bbbbbb cccccc dddddd eeeeee ffffff ggggg hhhhh iiiii jjjj kk
+ */
+
+ /*
+ * Old indent unnecessarily removed the star comment continuation on
+ * the next line.
+ *
+ * *test*
+ */
+
+ /*
+ * r309219 Go through linked list, freeing from the malloced (t[-1])
+ * address.
+ */
+
+ /* r309343 */
+}
+//indent end
+
+
+/*
+ * The first Christmas tree is to the right of the code, therefore the comment
+ * is moved to the code comment column; the follow-up lines of that comment
+ * are moved by the same distance, to preserve the internal layout.
+ *
+ * The other Christmas tree is a standalone block comment, therefore the
+ * comment starts in the code column.
+ *
+ * Since the comments occur between psym_if_expr and the following statement,
+ * they are handled by search_stmt_comment.
+ */
+//indent input
+{
+ if (1) /*- a Christmas tree * search_stmt_comment
+ ***
+ ***** */
+ /*- another one * search_stmt_comment
+ ***
+ ***** */
+ 1;
+}
+//indent end
+
+//indent run -bbb
+{
+ if (1) /*- a Christmas tree * search_stmt_comment
+ ***
+ ***** */
+ /*- another one * search_stmt_comment
+ ***
+ ***** */
+ 1;
+}
+//indent end
+
+
+/*
+ * The first Christmas tree is to the right of the code, therefore the comment
+ * is moved to the code comment column; the follow-up lines of that comment
+ * are moved by the same distance, to preserve the internal layout.
+ *
+ * The other Christmas tree is a standalone block comment, therefore the
+ * comment starts in the code column.
+ */
+//indent input
+{
+ if (7) { /*- a Christmas tree *
+ ***
+ ***** */
+ /*- another one *
+ ***
+ ***** */
+ stmt();
+ }
+}
+//indent end
+
+//indent run -bbb
+{
+ if (7) { /*- a Christmas tree *
+ ***
+ ***** */
+ /*- another one *
+ ***
+ ***** */
+ stmt();
+ }
+}
+//indent end
+
+
+//indent input
+int decl;/*-fixed comment
+ fixed comment*/
+//indent end
+
+//indent run -di0
+int decl; /*-fixed comment
+ fixed comment*/
+//indent end
+/*
+ * XXX: The second line of the above comment contains 11 spaces in a row,
+ * instead of using as many tabs as possible.
+ */
+
+
+//indent input
+{
+ if (0)/*-search_stmt_comment |
+ search_stmt_comment |*/
+ ;
+}
+//indent end
+
+//indent run -di0
+{
+ if (0) /*-search_stmt_comment |
+ search_stmt_comment |*/
+ ;
+}
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the right.
+ */
+//indent input
+int decl;/*-fixed comment
+123456789ab fixed comment*/
+//indent end
+
+//indent run -di0
+int decl; /*-fixed comment
+ 123456789ab fixed comment*/
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the right.
+ *
+ * This comment is handled by search_stmt_comment.
+ */
+//indent input
+{
+ if(0)/*-search_stmt_comment
+123456789ab search_stmt_comment |*/
+ ;
+}
+//indent end
+
+//indent run -di0
+{
+ if (0) /*-search_stmt_comment
+ 123456789ab search_stmt_comment |*/
+ ;
+}
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the left. In this case, the internal layout of the comment cannot be
+ * preserved since the second line already starts in column 1.
+ */
+//indent input
+int decl; /*-|fixed comment
+ | minus 12 |
+ | tabs inside |
+ |---|
+|-----------|
+tab1+++ tab2--- tab3+++ tab4--- tab5+++ tab6--- tab7+++fixed comment*/
+//indent end
+
+//indent run -di0
+int decl; /*-|fixed comment
+ | minus 12 |
+| tabs inside |
+|---|
+|-----------|
+tab1+++ tab2--- tab3+++ tab4--- tab5+++ tab6--- tab7+++fixed comment*/
+//indent end
+
+
+/*
+ * Ensure that all text of the comment is preserved when the comment is moved
+ * to the left. In this case, the internal layout of the comment cannot be
+ * preserved since the second line already starts in column 1.
+ *
+ * This comment is processed by search_stmt_comment.
+ */
+//indent input
+{
+ if(0) /*-|search_stmt_comment
+ | minus 12 |
+ | tabs inside |
+ |---|
+|-----------|
+tab1+++ tab2--- tab3+++ tab4--- tab5+++ tab6--- tab7+++fixed comment*/
+ ;
+}
+//indent end
+
+//indent run -di0
+{
+ if (0) /*-|search_stmt_comment
+ | minus 12 |
+| tabs inside |
+|---|
+|-----------|
+tab1+++ tab2--- tab3+++ tab4--- tab5+++ tab6--- tab7+++fixed comment*/
+ ;
+}
+//indent end
+
+
+/*
+ * Ensure that '{' after a search_stmt_comment is preserved.
+ */
+//indent input
+{
+ if(0)/*comment*/{
+ }
+}
+//indent end
+
+/* The comment in the output has moved to the right of the '{'. */
+//indent run
+{
+ if (0) { /* comment */
+ }
+}
+//indent end
+
+
+/*
+ * The following comments test line breaking when the comment ends with a
+ * space.
+ */
+//indent input
+/* 456789 123456789 123456789 12345 */
+/* 456789 123456789 123456789 123456 */
+/* 456789 123456789 123456789 1234567 */
+/* 456789 123456789 123456789 12345678 */
+/* 456789 123456789 123456789 123456789 */
+//indent end
+
+//indent run -l38
+/* 456789 123456789 123456789 12345 */
+/*
+ * 456789 123456789 123456789 123456
+ */
+/*
+ * 456789 123456789 123456789 1234567
+ */
+/*
+ * 456789 123456789 123456789 12345678
+ */
+/*
+ * 456789 123456789 123456789
+ * 123456789
+ */
+//indent end
+
+
+/*
+ * The following comments test line breaking when the comment does not end
+ * with a space. Since indent adds a trailing space to a single-line comment,
+ * this space has to be taken into account when computing the line length.
+ */
+//indent input
+/* x . line length 35*/
+/* x .. line length 36*/
+/* x ... line length 37*/
+/* x .... line length 38*/
+/* x ..... line length 39*/
+/* x ...... line length 40*/
+/* x ....... line length 41*/
+/* x ........ line length 42*/
+//indent end
+
+//indent run -l38
+/* x . line length 35 */
+/* x .. line length 36 */
+/* x ... line length 37 */
+/* x .... line length 38 */
+/*
+ * x ..... line length 39
+ */
+/*
+ * x ...... line length 40
+ */
+/*
+ * x ....... line length 41
+ */
+/*
+ * x ........ line length 42
+ */
+//indent end
+
+
+/*
+ * The different types of comments that indent distinguishes, starting in
+ * column 1 (see options '-fc1' and '-nfc1').
+ */
+//indent input
+/* This is a traditional C block comment. */
+
+// This is a C99 line comment.
+
+/*
+ * This is a box comment since its first line (the one above this line) is
+ * empty.
+ *
+ *
+ *
+ * Its text gets wrapped.
+ * Empty lines serve as paragraphs.
+ */
+
+/**
+ * This is a box comment
+ * that is not re-wrapped.
+ */
+
+/*-
+ * This is a box comment
+ * that is not re-wrapped.
+ * It is often used for copyright declarations.
+ */
+//indent end
+
+//indent run
+/* This is a traditional C block comment. */
+
+// This is a C99 line comment.
+
+/*
+ * This is a box comment since its first line (the one above this line) is
+ * empty.
+ *
+ *
+ *
+ * Its text gets wrapped. Empty lines serve as paragraphs.
+ */
+
+/**
+ * This is a box comment
+ * that is not re-wrapped.
+ */
+
+/*-
+ * This is a box comment
+ * that is not re-wrapped.
+ * It is often used for copyright declarations.
+ */
+//indent end
+
+
+/*
+ * The different types of comments that indent distinguishes, starting in
+ * column 9, so they are independent of the option '-fc1'.
+ */
+//indent input
+void
+function(void)
+{
+ /* This is a traditional C block comment. */
+
+ /*
+ * This is a box comment.
+ *
+ * It starts in column 9, not 1,
+ * therefore it gets re-wrapped.
+ */
+
+ /**
+ * This is a box comment
+ * that is not re-wrapped, even though it starts in column 9, not 1.
+ */
+
+ /*-
+ * This is a box comment
+ * that is not re-wrapped.
+ */
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+ /* This is a traditional C block comment. */
+
+ /*
+ * This is a box comment.
+ *
+ * It starts in column 9, not 1, therefore it gets re-wrapped.
+ */
+
+ /**
+ * This is a box comment
+ * that is not re-wrapped, even though it starts in column 9, not 1.
+ */
+
+ /*-
+ * This is a box comment
+ * that is not re-wrapped.
+ */
+}
+//indent end
+
+
+/*
+ * Comments to the right of declarations.
+ */
+//indent input
+void
+function(void)
+{
+ int decl; /* declaration comment */
+
+ int decl; /* short
+ * multi-line
+ * declaration
+ * comment */
+
+ int decl; /* long single-line declaration comment that is longer than the allowed line width */
+
+ int decl; /* long multi-line declaration comment
+ * that is longer than
+ * the allowed line width */
+
+ int decl; // C99 declaration comment
+
+ {
+ int decl; /* indented declaration */
+ {
+ int decl; /* indented declaration */
+ {
+ int decl; /* indented declaration */
+ {
+ int decl; /* indented declaration */
+ }
+ }
+ }
+ }
+}
+//indent end
+
+//indent run -ldi0
+void
+function(void)
+{
+ int decl; /* declaration comment */
+
+ int decl; /* short multi-line declaration comment */
+
+ int decl; /* long single-line declaration comment that
+ * is longer than the allowed line width */
+
+ int decl; /* long multi-line declaration comment that is
+ * longer than the allowed line width */
+
+ int decl; // C99 declaration comment
+
+ {
+ int decl; /* indented declaration */
+ {
+ int decl; /* indented declaration */
+ {
+ int decl; /* indented declaration */
+ {
+ int decl; /* indented declaration */
+ }
+ }
+ }
+ }
+}
+//indent end
+
+
+/*
+ * Comments to the right of code.
+ */
+//indent input
+void
+function(void)
+{
+ code(); /* code comment */
+ code(); /* code comment _________ to line length 78 */
+ code(); /* code comment __________ to line length 79 */
+ code(); /* code comment ___________ to line length 80 */
+ code(); /* code comment ____________ to line length 81 */
+ code(); /* code comment _____________ to line length 82 */
+
+/* $ In the following comments, the line length is measured after formatting. */
+ code(); /* code comment _________ to line length 78*/
+ code(); /* code comment __________ to line length 79*/
+ code(); /* code comment ___________ to line length 80*/
+ code(); /* code comment ____________ to line length 81*/
+ code(); /* code comment _____________ to line length 82*/
+
+ code(); /* short
+ * multi-line
+ * code
+ * comment */
+
+ code(); /* long single-line code comment that is longer than the allowed line width */
+
+ code(); /* long multi-line code comment
+ * that is longer than
+ * the allowed line width */
+
+ code(); // C99 code comment
+ code(); // C99 code comment ________ to line length 78
+ code(); // C99 code comment _________ to line length 79
+ code(); // C99 code comment __________ to line length 80
+ code(); // C99 code comment ___________ to line length 81
+ code(); // C99 code comment ____________ to line length 82
+
+ if (cond) /* comment */
+ if (cond) /* comment */
+ if (cond) /* comment */
+ if (cond) /* comment */
+ if (cond) /* comment */
+ code(); /* comment */
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+ code(); /* code comment */
+ code(); /* code comment _________ to line length 78 */
+ code(); /* code comment __________ to line length 79 */
+ code(); /* code comment ___________ to line length 80 */
+ code(); /* code comment ____________ to line length 81 */
+ code(); /* code comment _____________ to line length
+ * 82 */
+
+/* $ In the following comments, the line length is measured after formatting. */
+ code(); /* code comment _________ to line length 78 */
+ code(); /* code comment __________ to line length 79 */
+ code(); /* code comment ___________ to line length 80 */
+ code(); /* code comment ____________ to line length 81 */
+ code(); /* code comment _____________ to line length
+ * 82 */
+
+ code(); /* short multi-line code comment */
+
+ code(); /* long single-line code comment that is
+ * longer than the allowed line width */
+
+ code(); /* long multi-line code comment that is longer
+ * than the allowed line width */
+
+/* $ Trailing C99 comments are not wrapped, as indent would not correctly */
+/* $ recognize the continuation lines as continued comments. For block */
+/* $ comments this works since the comment has not ended yet. */
+ code(); // C99 code comment
+ code(); // C99 code comment ________ to line length 78
+ code(); // C99 code comment _________ to line length 79
+ code(); // C99 code comment __________ to line length 80
+ code(); // C99 code comment ___________ to line length 81
+ code(); // C99 code comment ____________ to line length 82
+
+ if (cond) /* comment */
+ if (cond) /* comment */
+ if (cond) /* comment */
+ if (cond) /* comment */
+ if (cond) /* comment */
+ code(); /* comment */
+}
+//indent end
+
+
+//indent input
+/*
+ * this
+ * is a boxed
+ * staircase.
+*
+* Its paragraphs get wrapped.
+
+There may also be
+ lines without asterisks.
+
+ */
+//indent end
+
+//indent run
+/*
+ * this is a boxed staircase.
+ *
+ * Its paragraphs get wrapped.
+ *
+ * There may also be lines without asterisks.
+ *
+ */
+//indent end
+
+
+//indent input
+void loop(void)
+{
+while(cond)/*comment*/;
+
+ while(cond)
+ /*comment*/;
+}
+//indent end
+
+//indent run
+void
+loop(void)
+{
+ while (cond) /* comment */
+ ;
+
+ while (cond)
+/* $ XXX: The spaces around the comment look unintentional. */
+ /* comment */ ;
+}
+//indent end
+
+
+/*
+ * The following comment starts really far to the right. To avoid that each
+ * line only contains a single word, the maximum allowed line width is
+ * extended such that each comment line may contain 22 characters.
+ */
+//indent input
+int global_variable_with_really_long_name_that_reaches_up_to_column_83; /* 1234567890123456789 1 1234567890123456789 12 1234567890123456789 123 1234567890123456789 1234 1234567890123456789 12345 1234567890123456789 123456 */
+//indent end
+
+//indent run
+int global_variable_with_really_long_name_that_reaches_up_to_column_83; /* 1234567890123456789 1
+ * 1234567890123456789 12
+ * 1234567890123456789
+ * 123
+ * 1234567890123456789
+ * 1234
+ * 1234567890123456789
+ * 12345
+ * 1234567890123456789
+ * 123456 */
+//indent end
+
+
+/*
+ * Demonstrates handling of line-end '//' comments.
+ *
+ * Even though this type of comments had been added in C99, indent didn't
+ * support these comments until 2021 and instead messed up the code in
+ * seemingly unpredictable ways. It treated any sequence of '/' as a binary
+ * operator, no matter whether it was '/' or '//' or '/////'.
+ */
+//indent input
+int dummy // comment
+ = // eq
+ 1 // one
+ + // plus
+ 2;// two
+
+/////separator/////
+
+void function(void){}
+
+// Note: removing one of these line-end comments affected the formatting
+// of the main function below, before indent supported '//' comments.
+
+int
+main(void)
+{
+}
+//indent end
+
+//indent run
+int dummy // comment
+= // eq
+1 // one
++ // plus
+2; // two
+
+/////separator/////
+
+void
+function(void)
+{
+}
+
+// Note: removing one of these line-end comments affected the formatting
+// of the main function below, before indent supported '//' comments.
+
+int
+main(void)
+{
+}
+//indent end
+
+
+/*
+ * Between March 2021 and October 2021, indent supported C99 comments only
+ * very basically. It messed up the following code, repeating the identifier
+ * 'bar' twice in a row.
+ */
+//indent input
+void c99_comment(void)
+{
+foo(); // C99 comment
+bar();
+}
+//indent end
+
+//indent run
+void
+c99_comment(void)
+{
+ foo(); // C99 comment
+ bar();
+}
+//indent end
+
+
//indent input
-// TODO: add input
+void
+comment_at_end_of_function(void)
+{
+ if (cond)
+ statement();
+ // comment
+}
//indent end
//indent run-equals-input
+
+
+//indent input
+int decl;
+// end-of-line comment at the end of the file
+//indent end
+
+//indent run-equals-input
+
+
+/* A form feed in the middle of a comment is an ordinary character. */
+//indent input
+/*
+ * AE
+ */
+/*-AE*/
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * At the beginning of a block comment or after a '*', '\f' is special. This
+ * is an implementation detail that should not be visible from the outside.
+ * Form feeds in comments are seldom used though, so this is no problem.
+ */
+//indent input
+/* comment*/
+/*text* comment*/
+//indent end
+
+//indent run
+/* * comment */
+/* text* * comment */
+//indent end
+
+/*
+ * Without 'star_comment_cont', there is no separator between the form feed
+ * and the surrounding text.
+ */
+//indent run -nsc
+/*comment */
+/* text*comment */
+//indent end
+
+//indent run-equals-input -nfc1
+
+
+/*
+ * A completely empty line in a box comment must be copied unmodified to the
+ * output. This is done in process_comment by adding a space to the end of an
+ * otherwise empty comment. This space forces output_complete_line to add some output,
+ * but the trailing space is discarded, resulting in an empty line.
+ */
+//indent input
+/*- comment
+
+
+end */
+//indent end
+
+//indent run-equals-input -nfc1
+
+
+//indent input
+/* comment comment comment comment Ümläute */
+//indent end
+
+//indent run -l40
+/*
+ * comment comment comment comment
+ * Ümläute
+ */
+//indent end
+
+
+//indent input
+int f(void)
+{
+ if (0)
+ /* 12 1234 123 123456 1234 1234567 123 1234. */;
+}
+//indent end
+
+/* The comment is too long to fit in a single line. */
+//indent run -l54
+int
+f(void)
+{
+ if (0)
+ /*
+ * 12 1234 123 123456 1234 1234567 123
+ * 1234.
+ */ ;
+}
+//indent end
+
+/* The comment fits in a single line. */
+//indent run
+int
+f(void)
+{
+ if (0)
+ /* 12 1234 123 123456 1234 1234567 123 1234. */ ;
+}
+//indent end
+
+
+/*
+ * Test for an edge cases in comment handling, having a block comment inside
+ * a line comment. Before NetBSD pr_comment.c 1.96 from 2021-11-04, indent
+ * wrongly assumed that the comment would end at the '*' '/', tokenizing the
+ * second word 'still' as a type_outside_parentheses.
+ */
+//indent input
+/* block comment */
+// line comment /* still a line comment */ still a line comment
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Tests for comments that are not wrapped.
+ */
+//indent input
+/*- tab space tab space */
+/*- very-long-word-that-cannot-be-broken very-long-word-that-cannot-be-broken */
+/*- very-long-word-that-cannot-be-broken very-long-word-that-cannot-be-broken */
+//indent end
+
+//indent run-equals-input -l5
+
+//indent run-equals-input -l32
+
+
+/*
+ * Test for form feeds in nowrap comments.
+ */
+//indent input
+/*-*/
+/*-<>*/
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Test two completely empty lines in a wrap comment. The second empty line
+ * covers the condition ps.next_col_1 in copy_comment_wrap.
+ */
+//indent input
+/* line 1
+
+
+line 4 */
+//indent end
+
+//indent run
+/*
+ * line 1
+ *
+ *
+ * line 4
+ */
+//indent end
+
+//indent run-equals-input -nfc1
+
+//indent run-equals-input -nfc1 -nsc
+
+//indent run -nsc
+/*
+line 1
+
+
+line 4
+ */
+//indent end
+
+//indent run-equals-input -nsc -ncdb
+
+
+/*
+ * Cover the code for expanding the comment buffer. As of 2021-11-07, the
+ * default buffer size is 200. To actually fill the comment buffer, there must
+ * be a single line of a comment that is longer than 200 bytes.
+ */
+//indent input
+/*-_____10________20________30________40________50________60________70________80________90_______100_______110_______120_______130_______140_______150_______160_______170_______180_______190_______200 */
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Cover the code for expanding the comment buffer in com_terminate. As of
+ * 2021-11-07, the default buffer size is 200, with a safety margin of 1 at
+ * the beginning and another safety margin of 5 at the end. To force the
+ * comment buffer to expanded in com_terminate, the comment must be exactly
+ * 193 bytes long.
+ */
+//indent input
+/*-_____10________20________30________40________50________60________70________80________90_______100_______110_______120_______130_______140_______150_______160_______170_______180_______190 */
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Since 2019-04-04 and before pr_comment.c 1.123 from 2021-11-25, the
+ * function analyze_comment wrongly joined the two comments.
+ */
+//indent input
+/*
+ *//*
+join*/
+//indent end
+
+/* FIXME: The last line of the first comment must not be modified. */
+//indent run -nfc1
+/*
+ *//*
+ * join
+ */
+//indent end
+
+
+/*
+ * Since 2019-04-04 and before pr_comment.c 1.123 from 2021-11-25, the
+ * function analyze_comment generated malformed output by terminating the
+ * first comment but omitting the start of the second comment.
+ */
+//indent input
+/*
+*//*
+error*/
+//indent end
+
+//indent run -nfc1
+/*
+ *//*
+ * error
+ */
+//indent end
Index: src/tests/usr.bin/indent/lsym_do.c
diff -u src/tests/usr.bin/indent/lsym_do.c:1.3 src/tests/usr.bin/indent/lsym_do.c:1.4
--- src/tests/usr.bin/indent/lsym_do.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_do.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_do.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_do.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_do, which represents the keyword 'do' that starts
@@ -12,7 +12,19 @@
*/
//indent input
-// TODO: add input
+void
+function(void)
+{
+ do stmt();while(cond);
+}
//indent end
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+ do
+ stmt();
+ while (cond);
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_eof.c
diff -u src/tests/usr.bin/indent/lsym_eof.c:1.3 src/tests/usr.bin/indent/lsym_eof.c:1.4
--- src/tests/usr.bin/indent/lsym_eof.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_eof.c Sun Apr 24 10:36:37 2022
@@ -1,11 +1,16 @@
-/* $NetBSD: lsym_eof.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_eof.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_eof, which represents the end of the input file.
+ *
+ * The end of a file typically occurs after a top-level declaration, or after
+ * a preprocessing directive. Everything else is a syntax error.
*/
//indent input
-// TODO: add input
+int decl;
//indent end
-//indent run-equals-input
+//indent run
+int decl;
+//indent end
Index: src/tests/usr.bin/indent/lsym_for.c
diff -u src/tests/usr.bin/indent/lsym_for.c:1.3 src/tests/usr.bin/indent/lsym_for.c:1.4
--- src/tests/usr.bin/indent/lsym_for.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_for.c Sun Apr 24 10:36:37 2022
@@ -1,12 +1,89 @@
-/* $NetBSD: lsym_for.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_for.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_for, which represents the keyword 'for' that
* starts a 'for' loop.
+ *
+ * Most 'for' loops have 3 expressions in their head. Each of these
+ * expressions is optional though.
+ *
+ * When all 3 expressions are omitted, the 'for' loop is often called a
+ * 'forever' loop.
*/
//indent input
-// TODO: add input
+void
+example(void)
+{
+ for (;;)
+ break;
+ for (var = value;;)
+ break;
+ for (; cond;)
+ break;
+ for (;; i++)
+ break;
+}
+//indent end
+
+//indent run-equals-input
+
+
+//indent input
+void
+function(void)
+{
+ for (int i = 0; i < 6; i++)
+ print_char("hello\n"[i]);
+ forever {
+ stmt();
+ }
+}
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Indent can cope with various syntax errors, which may be caused by
+ * syntactic macros like 'forever' or 'foreach'.
+ */
+//indent input
+#define forever for (;;)
+#define foreach(list, it) for (it = list.first; it != NULL; it = it->next)
+
+void
+function(void)
+{
+ forever
+ stmt();
+
+ forever {
+ stmt();
+ }
+
+/* $ No space after 'foreach' since it looks like a function name. */
+ foreach(list, it)
+ println(it->data);
+
+/* $ No space after 'foreach' since it looks like a function name. */
+ foreach(list, it) {
+ println(it->data);
+ }
+}
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Another variant of a 'for' loop, seen in sys/arch/arm/apple/apple_intc.c.
+ */
+//indent input
+{
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ }
+}
//indent end
//indent run-equals-input
Index: src/tests/usr.bin/indent/lsym_form_feed.c
diff -u src/tests/usr.bin/indent/lsym_form_feed.c:1.3 src/tests/usr.bin/indent/lsym_form_feed.c:1.4
--- src/tests/usr.bin/indent/lsym_form_feed.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_form_feed.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_form_feed.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_form_feed.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_form_feed, which represents a form feed, a special
@@ -8,7 +8,41 @@
*/
//indent input
-// TODO: add input
+void function_1(void);
+
+void function_2(void);
//indent end
-//indent run-equals-input
+//indent run -di0
+void function_1(void);
+
+/* $ XXX: The form feed is not preserved. */
+/* $ XXX: Why 2 empty lines? */
+
+void function_2(void);
+//indent end
+
+
+/*
+ * Test form feed after 'if (expr)', which is handled in search_stmt.
+ */
+//indent input
+void function(void)
+{
+ if (expr)
+ /* <-- form feed */
+ {
+ }
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+ if (expr) {
+ /* $ XXX: The form feed has disappeared. */
+ /* <-- form feed */
+ }
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_funcname.c
diff -u src/tests/usr.bin/indent/lsym_funcname.c:1.3 src/tests/usr.bin/indent/lsym_funcname.c:1.4
--- src/tests/usr.bin/indent/lsym_funcname.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_funcname.c Sun Apr 24 10:36:37 2022
@@ -1,15 +1,23 @@
-/* $NetBSD: lsym_funcname.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_funcname.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_funcname, which is an identifier that is followed
* by an opening parenthesis.
*
+ * TODO: Document how lsym_funcname is handled differently from lsym_word.
+ *
* See also:
* lsym_word.c
*/
//indent input
-// TODO: add input
+void
+function(void)
+{
+ func();
+ (func)();
+ func(1, 2, 3);
+}
//indent end
//indent run-equals-input
Index: src/tests/usr.bin/indent/lsym_if.c
diff -u src/tests/usr.bin/indent/lsym_if.c:1.3 src/tests/usr.bin/indent/lsym_if.c:1.4
--- src/tests/usr.bin/indent/lsym_if.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_if.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_if.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_if.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_if, which represents the keyword 'if' that starts
@@ -6,7 +6,18 @@
*/
//indent input
-// TODO: add input
+void
+function(void)
+{
+ if(cond)stmt();
+}
//indent end
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+ if (cond)
+ stmt();
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_newline.c
diff -u src/tests/usr.bin/indent/lsym_newline.c:1.3 src/tests/usr.bin/indent/lsym_newline.c:1.4
--- src/tests/usr.bin/indent/lsym_newline.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_newline.c Sun Apr 24 10:36:37 2022
@@ -1,17 +1,34 @@
-/* $NetBSD: lsym_newline.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_newline.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_newline, which represents a forced line break in
* the source code.
*
- * Indent preserves most of the line breaks from the original code.
+ * A newline ends an end-of-line comment that has been started with '//'.
+ *
+ * When a line ends with a backslash immediately followed by '\n', these two
+ * characters are merged and continue the logical line (C11 5.1.1.2p1i2).
+ *
+ * In other contexts, a newline is an ordinary space character from a
+ * compiler's point of view. Indent preserves most line breaks though.
*
* See also:
* lsym_form_feed.c
*/
+
//indent input
-// TODO: add input
+int var=
+1
+ +2
+ +3
+ +4;
//indent end
-//indent run-equals-input
+//indent run
+int var =
+1
++ 2
++ 3
++ 4;
+//indent end
Index: src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c
diff -u src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c:1.3 src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c:1.4
--- src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_rparen_or_rbracket.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_rparen_or_rbracket.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_rparen_or_rbracket.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_rparen_or_lbracket, which represents ')' or ']',
@@ -9,7 +9,12 @@
*/
//indent input
-// TODO: add input
+int var = (3);
+int cast = (int)3;
+int cast = (int)(3);
+int call = function(3);
+int array[3] = {1, 2, 3};
+int array[3] = {[2] = 3};
//indent end
-//indent run-equals-input
+//indent run-equals-input -di0
Index: src/tests/usr.bin/indent/lsym_semicolon.c
diff -u src/tests/usr.bin/indent/lsym_semicolon.c:1.3 src/tests/usr.bin/indent/lsym_semicolon.c:1.4
--- src/tests/usr.bin/indent/lsym_semicolon.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_semicolon.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_semicolon.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_semicolon.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_semicolon, which represents ';' in these contexts:
@@ -12,7 +12,32 @@
*/
//indent input
-// TODO: add input
+struct {
+ int member;
+} global_var;
//indent end
-//indent run-equals-input
+//indent run-equals-input -di0
+
+
+//indent input
+void
+function(void)
+{
+ for ( ; ; )
+ stmt();
+ for (;;)
+ stmt();
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+ for (;;)
+ stmt();
+ for (;;)
+ stmt();
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_storage_class.c
diff -u src/tests/usr.bin/indent/lsym_storage_class.c:1.3 src/tests/usr.bin/indent/lsym_storage_class.c:1.4
--- src/tests/usr.bin/indent/lsym_storage_class.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_storage_class.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_storage_class.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_storage_class.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_storage_class, which represents a storage class as
@@ -6,7 +6,9 @@
*/
//indent input
-// TODO: add input
+static int definition_with_internal_linkage;
+extern int declaration_with_external_linkage;
+int definition_with_external_linkage;
//indent end
-//indent run-equals-input
+//indent run-equals-input -di0
Index: src/tests/usr.bin/indent/lsym_while.c
diff -u src/tests/usr.bin/indent/lsym_while.c:1.3 src/tests/usr.bin/indent/lsym_while.c:1.4
--- src/tests/usr.bin/indent/lsym_while.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_while.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_while.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_while.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token 'lsym_while', which represents the keyword 'while' that
@@ -6,7 +6,22 @@
*/
//indent input
-// TODO: add input
+void
+function(void)
+{
+ while(cond)stmt();
+ do stmt();while(cond);
+}
//indent end
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+ while (cond)
+ stmt();
+ do
+ stmt();
+ while (cond);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_decl.c
diff -u src/tests/usr.bin/indent/psym_decl.c:1.3 src/tests/usr.bin/indent/psym_decl.c:1.4
--- src/tests/usr.bin/indent/psym_decl.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_decl.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_decl.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_decl.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the parser symbol psym_decl, which represents a declaration.
@@ -6,13 +6,33 @@
* Since C99, declarations and statements can be mixed in blocks.
*
* A label can be followed by a statement but not by a declaration.
+ *
+ * Indent distinguishes global and local declarations.
+ *
+ * Declarations can be for functions or for variables.
*/
// TODO: prove that psym_decl can only ever occur at the top of the stack.
// TODO: delete decl_level if the above is proven.
//indent input
-// TODO: add input
+int global_var;
+int global_array = [1,2,3,4];
+int global_array = [
+1
+,2,
+3,
+4,
+];
//indent end
-//indent run-equals-input
+//indent run -di0
+int global_var;
+int global_array = [1, 2, 3, 4];
+int global_array = [
+ 1
+ ,2,
+ 3,
+ 4,
+];
+//indent end
Index: src/tests/usr.bin/indent/psym_do.c
diff -u src/tests/usr.bin/indent/psym_do.c:1.3 src/tests/usr.bin/indent/psym_do.c:1.4
--- src/tests/usr.bin/indent/psym_do.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_do.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_do.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_do.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the parser symbol psym_do, which represents the state after
@@ -6,7 +6,43 @@
*/
//indent input
-// TODO: add input
+void function(void) {
+ do stmt(); while (0);
+ do {} while (0);
+}
//indent end
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+ do
+ stmt();
+ while (0);
+ do {
+ } while (0);
+}
+//indent end
+
+
+/*
+ * The keyword 'do' is followed by a statement, as opposed to 'while', which
+ * is followed by a parenthesized expression.
+ */
+//indent input
+void
+function(void)
+{
+ do(var)--;while(var>0);
+}
+//indent end
+
+//indent run
+void
+function(void)
+{
+ do
+ (var)--;
+ while (var > 0);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_do_stmt.c
diff -u src/tests/usr.bin/indent/psym_do_stmt.c:1.3 src/tests/usr.bin/indent/psym_do_stmt.c:1.4
--- src/tests/usr.bin/indent/psym_do_stmt.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_do_stmt.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_do_stmt.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_do_stmt.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the parser symbol psym_do_stmt, which represents the state after
@@ -7,7 +7,25 @@
*/
//indent input
-// TODO: add input
+void function(void) {
+ do stmt(); while (0);
+ do { stmt(); } while (0);
+ do /* comment */ stmt(); while (0);
+}
//indent end
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+ do
+ stmt();
+ while (0);
+ do {
+ stmt();
+ } while (0);
+ do /* comment */
+ stmt();
+ while (0);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_else.c
diff -u src/tests/usr.bin/indent/psym_else.c:1.3 src/tests/usr.bin/indent/psym_else.c:1.4
--- src/tests/usr.bin/indent/psym_else.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_else.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_else.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_else.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the parser symbol psym_else, which represents the keyword 'else'
@@ -7,8 +7,67 @@
* This parser symbol never ends up on the stack itself.
*/
+/*
+ * When parsing nested incomplete 'if' statements, the problem of the
+ * 'dangling else' occurs. It is resolved by binding the 'else' to the
+ * innermost incomplete 'if' statement.
+ *
+ * In 'parse', an if_expr_stmt is reduced to a simple statement, unless the
+ * next token is 'else'. The comment does not influence this since it never
+ * reaches 'parse'.
+ */
+//indent input
+void
+example(bool cond)
+{
+ if (cond)
+ if (cond)
+ if (cond)
+ stmt();
+ else
+ stmt();
+ /* comment */
+ else
+ stmt();
+}
+//indent end
+
+//indent run
+void
+example(bool cond)
+{
+ if (cond)
+ if (cond)
+ if (cond)
+ stmt();
+ else
+ stmt();
+ /* comment */
+ else
+ stmt();
+}
+//indent end
+
+
+/*
+ * The keyword 'else' is followed by an expression, as opposed to 'if', which
+ * is followed by a parenthesized expression.
+ */
//indent input
-// TODO: add input
+void
+function(void)
+{
+ if(var>0)var=0;else(var=3);
+}
//indent end
-//indent run-equals-input
+//indent run
+void
+function(void)
+{
+ if (var > 0)
+ var = 0;
+ else
+ (var = 3);
+}
+//indent end
Index: src/tests/usr.bin/indent/psym_stmt.c
diff -u src/tests/usr.bin/indent/psym_stmt.c:1.3 src/tests/usr.bin/indent/psym_stmt.c:1.4
--- src/tests/usr.bin/indent/psym_stmt.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_stmt.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: psym_stmt.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_stmt.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the parser symbol psym_stmt, which represents a statement on the
@@ -8,7 +8,21 @@
*/
//indent input
-// TODO: add input
+#define unless(cond) if (!(cond))
+
+void
+function(void)
+{
+ stmt();
+ stmt; /* probably some macro */
+
+ unless(cond)
+ stmt();
+}
//indent end
+/*
+ * There is no space after 'unless' since indent cannot know that it is a
+ * syntactic macro, especially not when its definition is in a header file.
+ */
//indent run-equals-input
Index: src/tests/usr.bin/indent/psym_stmt_list.c
diff -u src/tests/usr.bin/indent/psym_stmt_list.c:1.3 src/tests/usr.bin/indent/psym_stmt_list.c:1.4
--- src/tests/usr.bin/indent/psym_stmt_list.c:1.3 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/psym_stmt_list.c Sun Apr 24 10:36:37 2022
@@ -1,14 +1,47 @@
-/* $NetBSD: psym_stmt_list.c,v 1.3 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: psym_stmt_list.c,v 1.4 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the parser symbol psym_stmt_list, which represents a list of
* statements.
*
+ * Since C99, in such a statement list, statements can be intermixed with
+ * declarations.
+ *
* TODO: explain why psym_stmt and psym_stmt_list are both necessary.
*/
//indent input
-// TODO: add input
+void
+function(void)
+{
+ stmt();
+ int var;
+ stmt();
+ {
+ stmt();
+ int var;
+ stmt();
+ }
+}
//indent end
-//indent run-equals-input
+//indent run-equals-input -ldi0
+
+
+//indent input
+void
+return_after_rbrace(void)
+{
+ {}return;
+}
+//indent end
+
+//indent run
+void
+return_after_rbrace(void)
+{
+ {
+// $ FIXME: The 'return' must go in a separate line.
+ } return;
+}
+//indent end
Index: src/tests/usr.bin/indent/lsym_preprocessing.c
diff -u src/tests/usr.bin/indent/lsym_preprocessing.c:1.4 src/tests/usr.bin/indent/lsym_preprocessing.c:1.5
--- src/tests/usr.bin/indent/lsym_preprocessing.c:1.4 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_preprocessing.c Sun Apr 24 10:36:37 2022
@@ -1,9 +1,15 @@
-/* $NetBSD: lsym_preprocessing.c,v 1.4 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_preprocessing.c,v 1.5 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_preprocessing, which represents a '#' that starts
* a preprocessing line.
*
+ * #define
+ * #ifdef
+ * #include
+ * #line
+ * #pragma
+ *
* The whole preprocessing line is processed separately from the main source
* code, without much tokenizing or parsing.
*/
@@ -39,3 +45,171 @@
// TODO: backslash-newline
// TODO: block comment
// TODO: line comment
+
+
+//indent input
+#include <system-header.h>
+#include "local-header.h"
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Nested conditional compilation.
+ */
+//indent input
+#if 0
+#else
+#endif
+
+#if 0 /* if comment */
+#else /* else comment */
+#endif /* endif comment */
+
+#if 0 /* outer if comment */
+# if nested /* inner if comment */
+# else /* inner else comment */
+# endif /* inner endif comment */
+#endif /* outer endif comment */
+//indent end
+
+//indent run
+#if 0
+#else
+#endif
+
+#if 0 /* if comment */
+#else /* else comment */
+#endif /* endif comment */
+
+#if 0 /* outer if comment */
+/* $ XXX: The indentation is removed, which can get confusing */
+#if nested /* inner if comment */
+#else /* inner else comment */
+#endif /* inner endif comment */
+#endif /* outer endif comment */
+//indent end
+
+
+//indent input
+#define multi_line_definition /* first line
+ * middle
+ * final line
+ */ actual_value
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Before indent.c 1.129 from 2021-10-08, indent mistakenly interpreted quotes
+ * in comments as starting a string literal. The '"' in the comment started a
+ * string, the next '"' finished the string, and the following '/' '*' was
+ * interpreted as the beginning of a comment. This comment lasted until the
+ * next '*' '/', which in this test is another preprocessor directive, solely
+ * for symmetry.
+ *
+ * The effect was that the extra space after d2 was not formatted, as that
+ * line was considered part of the comment.
+ */
+//indent input
+#define comment_in_string_literal "/* no comment "
+int this_is_an_ordinary_line_again;
+
+int d1 ;
+#define confuse_d /*"*/ "/*"
+int d2 ;
+#define resolve_d "*/"
+int d3 ;
+
+int s1 ;
+#define confuse_s /*'*/ '/*'
+int s2 ;
+#define resolve_s '*/'
+int s3 ;
+//indent end
+
+//indent run
+#define comment_in_string_literal "/* no comment "
+int this_is_an_ordinary_line_again;
+
+int d1;
+#define confuse_d /*"*/ "/*"
+int d2;
+#define resolve_d "*/"
+int d3;
+
+int s1;
+#define confuse_s /*'*/ '/*'
+int s2;
+#define resolve_s '*/'
+int s3;
+//indent end
+
+
+/*
+ * A preprocessing directive inside an expression keeps the state about
+ * whether the next operator is unary or binary.
+ */
+//indent input
+int binary_plus = 3
+#define intermediate 1
+ +4;
+int unary_plus =
+#define intermediate 1
+ + 4;
+//indent end
+
+//indent run
+int binary_plus = 3
+#define intermediate 1
++ 4;
+int unary_plus =
+#define intermediate 1
++4;
+//indent end
+
+
+/*
+ * Before io.c 1.135 from 2021-11-26, indent fixed malformed preprocessing
+ * lines that had arguments even though they shouldn't. It is not the task of
+ * an indenter to fix code, that's what a linter is for.
+ */
+//indent input
+#if 0
+#elif 1
+#else if 3
+#endif 0
+//indent end
+
+//indent run-equals-input
+
+
+/*
+ * Existing comments are indented just like code comments.
+ *
+ * This means that the above wrong preprocessing lines (#else with argument)
+ * need to be fed through indent twice until they become stable. Since
+ * compilers issue warnings about these invalid lines, not much code still has
+ * these, making this automatic fix an edge case.
+ */
+//indent input
+#if 0 /* comment */
+#else /* comment */
+#endif /* comment */
+
+#if 0/* comment */
+#else/* comment */
+#endif/* comment */
+//indent end
+
+//indent run
+#if 0 /* comment */
+#else /* comment */
+#endif /* comment */
+
+#if 0 /* comment */
+#else /* comment */
+#endif /* comment */
+//indent end
Index: src/tests/usr.bin/indent/lsym_tag.c
diff -u src/tests/usr.bin/indent/lsym_tag.c:1.4 src/tests/usr.bin/indent/lsym_tag.c:1.5
--- src/tests/usr.bin/indent/lsym_tag.c:1.4 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_tag.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_tag.c,v 1.4 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_tag.c,v 1.5 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_tag, which represents one of the keywords
@@ -40,3 +40,77 @@ indent_enum_constants(void)
//indent end
//indent run-equals-input -ci2
+
+
+//indent input
+struct stat {
+ mode_t st_mode;
+};
+
+union variant {
+ enum {
+ } tag;
+ int v_int;
+ long v_long;
+ bool v_bool;
+ void *v_pointer;
+};
+//indent end
+
+//indent run-equals-input
+
+
+/* See FreeBSD r303485. */
+//indent input
+int f(struct x *a);
+
+void
+t(void)
+{
+ static const struct {
+ int a;
+ int b;
+ } c[] = {
+ { D, E },
+ { F, G }
+ };
+}
+
+void u(struct x a) {
+ int b;
+ struct y c = (struct y *)&a;
+}
+//indent end
+
+//indent run
+int f(struct x *a);
+
+void
+t(void)
+{
+ static const struct {
+ int a;
+ int b;
+ } c[] = {
+ {D, E},
+ {F, G}
+ };
+}
+
+void
+u(struct x a)
+{
+ int b;
+ struct y c = (struct y *)&a;
+}
+//indent end
+
+
+/* Comment between 'struct' and the tag name; doesn't occur in practice. */
+//indent input
+struct /* comment */ tag var;
+//indent end
+
+//indent run -di0
+struct /* comment */ tag var;
+//indent end
Index: src/tests/usr.bin/indent/lsym_unary_op.c
diff -u src/tests/usr.bin/indent/lsym_unary_op.c:1.4 src/tests/usr.bin/indent/lsym_unary_op.c:1.5
--- src/tests/usr.bin/indent/lsym_unary_op.c:1.4 Sun Apr 24 09:04:12 2022
+++ src/tests/usr.bin/indent/lsym_unary_op.c Sun Apr 24 10:36:37 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: lsym_unary_op.c,v 1.4 2022/04/24 09:04:12 rillig Exp $ */
+/* $NetBSD: lsym_unary_op.c,v 1.5 2022/04/24 10:36:37 rillig Exp $ */
/*
* Tests for the token lsym_unary_op, which represents a unary operator.
@@ -20,3 +20,37 @@ unary_operators(void)
//indent end
//indent run-equals-input
+
+
+/*
+ * The unary operators '+' and '-' can occur in long chains. In these chains,
+ * adjacent '+' must not be merged to '++' since that would be a different
+ * token. The same applies to '&', but that case is irrelevant in practice
+ * since the address of an address cannot be taken.
+ */
+//indent input
+int var=+3;
+int mixed=+-+-+-+-+-+-+-+-+-+-+-+-+-3;
+int count=~-~-~-~-~-~-~-~-~-~-~-~-~-3;
+int same = + + + + + - - - - - 3;
+//indent end
+
+//indent run -di0
+int var = +3;
+int mixed = +-+-+-+-+-+-+-+-+-+-+-+-+-3;
+int count = ~-~-~-~-~-~-~-~-~-~-~-~-~-3;
+int same = + + + + +- - - - -3;
+//indent end
+
+
+/*
+ * A special kind of unary operator is '->', which additionally suppresses the
+ * next space.
+ */
+//indent input
+int var = p -> member;
+//indent end
+
+//indent run -di0
+int var = p->member;
+//indent end
Added files:
Index: src/tests/usr.bin/indent/edge_cases.c
diff -u /dev/null src/tests/usr.bin/indent/edge_cases.c:1.1
--- /dev/null Sun Apr 24 10:36:37 2022
+++ src/tests/usr.bin/indent/edge_cases.c Sun Apr 24 10:36:37 2022
@@ -0,0 +1,42 @@
+/* $NetBSD: edge_cases.c,v 1.1 2022/04/24 10:36:37 rillig Exp $ */
+
+/*
+ * Tests for edge cases in the C programming language that indent does not
+ * support or in which cases indent behaves strangely.
+ */
+
+/*
+ * Digraphs are replacements for the characters '[', '{' and '#', which are
+ * missing in some exotic restricted source character sets.
+ *
+ * See C99 6.4.6
+ */
+//indent input
+void
+digraphs(void)
+{
+ /* same as 'array[subscript]' */
+ number = array<:subscript:>;
+
+ /* same as '(int){ initializer }' */
+ number = (int)<% initializer %>;
+}
+//indent end
+
+//indent run
+void
+digraphs(void)
+{
+ /* same as 'array[subscript]' */
+// $ XXX: The indentation is completely wrong.
+// $ XXX: The space between 'array' and '<' doesn't belong there.
+number = array <:subscript:>;
+
+ /* same as '(int){ initializer }' */
+// $ XXX: The space between '%' and '>' doesn't belong there.
+ number = (int)<%initializer % >;
+}
+//indent end
+
+/* TODO: test trigraphs, which are as unusual as digraphs */
+/* TODO: test digraphs and trigraphs in string literals, just for fun */
Index: src/tests/usr.bin/indent/fmt_init.c
diff -u /dev/null src/tests/usr.bin/indent/fmt_init.c:1.1
--- /dev/null Sun Apr 24 10:36:37 2022
+++ src/tests/usr.bin/indent/fmt_init.c Sun Apr 24 10:36:37 2022
@@ -0,0 +1,39 @@
+/* $NetBSD: fmt_init.c,v 1.1 2022/04/24 10:36:37 rillig Exp $ */
+
+/*
+ * Tests for variable initializations.
+ */
+
+//indent input
+int global = { initializer };
+int global = {
+ initializer
+};
+
+void
+example(void)
+{
+ int local = { initializer };
+ int local = {
+ initializer
+ };
+}
+//indent end
+
+//indent run -di0
+// $ XXX: The spaces around the initializer are gone.
+int global = {initializer};
+int global = {
+ initializer
+};
+
+void
+example(void)
+{
+ // $ XXX: The spaces around the initializer are gone.
+ int local = {initializer};
+ int local = {
+ initializer
+ };
+}
+//indent end