Module Name:    src
Committed By:   rillig
Date:           Sun Nov 28 11:49:11 UTC 2021

Modified Files:
        src/usr.bin/indent: indent.c indent.h io.c

Log Message:
indent: clean up and document input handling

The transformation of moving comments from after an 'if (expr)' after
the following brace has a large implementation cost (about 300 lines of
code) and makes input handling quite complicated. Document the overall
idea to save future readers some time.

No functional change.


To generate a diff of this commit:
cvs rdiff -u -r1.237 -r1.238 src/usr.bin/indent/indent.c
cvs rdiff -u -r1.105 -r1.106 src/usr.bin/indent/indent.h
cvs rdiff -u -r1.142 -r1.143 src/usr.bin/indent/io.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/indent/indent.c
diff -u src/usr.bin/indent/indent.c:1.237 src/usr.bin/indent/indent.c:1.238
--- src/usr.bin/indent/indent.c:1.237	Sat Nov 27 21:15:58 2021
+++ src/usr.bin/indent/indent.c	Sun Nov 28 11:49:10 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.c,v 1.237 2021/11/27 21:15:58 rillig Exp $	*/
+/*	$NetBSD: indent.c,v 1.238 2021/11/28 11:49:10 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@ static char sccsid[] = "@(#)indent.c	5.1
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.237 2021/11/27 21:15:58 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.238 2021/11/28 11:49:10 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
 #endif
@@ -322,7 +322,7 @@ search_stmt_other(lexer_symbol lsym, boo
     }
 
     debug_inp(__func__);
-    inp_comment_rtrim();
+    inp_comment_rtrim_blank();
 
     if (opt.swallow_optional_blanklines ||
 	(!comment_buffered && remove_newlines)) {

Index: src/usr.bin/indent/indent.h
diff -u src/usr.bin/indent/indent.h:1.105 src/usr.bin/indent/indent.h:1.106
--- src/usr.bin/indent/indent.h:1.105	Sat Nov 27 21:15:58 2021
+++ src/usr.bin/indent/indent.h	Sun Nov 28 11:49:10 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.h,v 1.105 2021/11/27 21:15:58 rillig Exp $	*/
+/*	$NetBSD: indent.h,v 1.106 2021/11/28 11:49:10 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -377,7 +377,7 @@ void inp_comment_add_char(char);
 void inp_comment_add_range(const char *, const char *);
 bool inp_comment_complete_block(void);
 bool inp_comment_seen(void);
-void inp_comment_rtrim(void);
+void inp_comment_rtrim_blank(void);
 void inp_comment_rtrim_newline(void);
 void inp_comment_insert_lbrace(void);
 

Index: src/usr.bin/indent/io.c
diff -u src/usr.bin/indent/io.c:1.142 src/usr.bin/indent/io.c:1.143
--- src/usr.bin/indent/io.c:1.142	Sat Nov 27 21:15:58 2021
+++ src/usr.bin/indent/io.c	Sun Nov 28 11:49:10 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: io.c,v 1.142 2021/11/27 21:15:58 rillig Exp $	*/
+/*	$NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@ static char sccsid[] = "@(#)io.c	8.1 (Be
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: io.c,v 1.142 2021/11/27 21:15:58 rillig Exp $");
+__RCSID("$NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
 #endif
@@ -55,18 +55,41 @@ __FBSDID("$FreeBSD: head/usr.bin/indent/
 
 #include "indent.h"
 
+/*
+ * There are 3 modes for reading the input.
+ *
+ * default: In this mode, the input comes from the input file. The buffer
+ * 'inp' contains the current line, terminated with '\n'. The current read
+ * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
+ * pointers are null.
+ *
+ * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
+ * from 'inp', but instead of processing it, it is copied to 'save_com'. The
+ * goal of this mode is to move the comments after the '{', that is to
+ * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
+ * token cannot be part of this transformation, switch to copy-out.
+ *
+ * copy-out: In this mode, the input comes from 'save_com', which contains the
+ * tokens to be placed after the '{'. The input still comes from the range
+ * [inp.s, inp.e), but these two members have been overwritten with pointers
+ * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
+ * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
+ * all tokens from save_com, switch to default mode again.
+ */
 static struct {
     struct buffer inp;		/* one line of input, ready to be split into
-				 * tokens; occasionally this buffer switches
+				 * tokens; occasionally 's' and 'e' switch
 				 * to save_com_buf */
     char save_com_buf[5000];	/* input text is saved here when looking for
 				 * the brace after an if, while, etc */
-    char *save_com_s;		/* start of the comment in save_com_buf */
-    char *save_com_e;		/* end of the comment in save_com_buf */
+    char *save_com_s;		/* start of the comment in save_com_buf, or
+				 * null */
+    char *save_com_e;		/* end of the comment in save_com_buf, or
+				 * null */
 
     char *saved_inp_s;		/* saved value of inp.s when taking input from
-				 * save_com */
-    char *saved_inp_e;		/* saved value of inp.e */
+				 * save_com, or null */
+    char *saved_inp_e;		/* saved value of inp.e, or null */
 } inbuf;
 
 static int paren_indent;
@@ -92,10 +115,6 @@ inp_p(void)
 const char *
 inp_line_start(void)
 {
-    /*
-     * The comment we're about to read usually comes from inp.buf, unless it
-     * has been copied into save_com.
-     */
     return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
 }
 
@@ -149,6 +168,9 @@ debug_inp_buf(const char *name, const ch
 void
 debug_inp(const char *prefix)
 {
+    assert(inp_line_start() <= inbuf.inp.s);
+    assert(inbuf.inp.s <= inbuf.inp.e);
+
     debug_println("%s %s:", __func__, prefix);
     if (inbuf.saved_inp_s == NULL)
 	debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
@@ -231,6 +253,10 @@ inp_comment_init_preproc(void)
 {
     if (inbuf.save_com_e == NULL) {	/* if this is the first comment, we
 					 * must set up the buffer */
+	/*
+	 * XXX: No space is reserved for a potential '{' here, unlike in
+	 * inp_comment_init_comment.
+	 */
 	inbuf.save_com_s = inbuf.save_com_buf;
 	inbuf.save_com_e = inbuf.save_com_s;
     } else {
@@ -269,19 +295,25 @@ inp_comment_seen(void)
 }
 
 void
-inp_comment_rtrim(void)
+inp_comment_rtrim_blank(void)
 {
-    while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
+    while (inbuf.save_com_e > inbuf.save_com_s &&
+	    ch_isblank(inbuf.save_com_e[-1]))
 	inbuf.save_com_e--;
 }
 
 void
 inp_comment_rtrim_newline(void)
 {
-    while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
+    while (inbuf.save_com_e > inbuf.save_com_s &&
+	    inbuf.save_com_e[-1] == '\n')
 	inbuf.save_com_e--;
 }
 
+/*
+ * Switch the input to come from save_com, replaying the copied tokens while
+ * looking for the next '{'.
+ */
 void
 inp_from_comment(void)
 {
@@ -289,7 +321,7 @@ inp_from_comment(void)
     inbuf.saved_inp_s = inbuf.inp.s;
     inbuf.saved_inp_e = inbuf.inp.e;
 
-    inbuf.inp.s = inbuf.save_com_s;	/* redirect lexi input to save_com_s */
+    inbuf.inp.s = inbuf.save_com_s;
     inbuf.inp.e = inbuf.save_com_e;
     inbuf.save_com_s = NULL;
     inbuf.save_com_e = NULL;
@@ -521,6 +553,7 @@ output_complete_line(char line_terminato
 	output_char(line_terminator);
 	ps.stats.lines++;
 
+	/* TODO: rename to blank_line_after_decl */
 	if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
 	    blank_line_before = true;
 	    ps.just_saw_decl = 0;
@@ -643,8 +676,8 @@ parse_indent_comment(void)
     skip_blank(&p);
     if (!skip_string(&p, "INDENT"))
 	return;
-    skip_blank(&p);
 
+    skip_blank(&p);
     if (*p == '*' || skip_string(&p, "ON"))
 	on = true;
     else if (skip_string(&p, "OFF"))
@@ -661,6 +694,10 @@ parse_indent_comment(void)
 
     inhibit_formatting = !on;
     if (on) {
+	/*
+	 * XXX: Does this make sense? Is the handling of blank lines above
+	 * INDENT OFF comments essentially the same?
+	 */
 	blank_lines_to_output = 0;
 	blank_line_after = false;
 	blank_line_before = false;

Reply via email to