Re: Emacs vs pg_indent's weird indentation for function declarations

Tom Lane Tue, 14 May 2019 21:44:07 -0700

Thomas Munro <[email protected]> writes:
> I tried teaching pgindent's post_indent subroutine to unmangle the
> multi-line declarations it mangles.  That produces correct
> indentation!  But can also produce lines that exceed the column limit
> we would normally wrap at (of course, because pg_bsd_indent had less
> whitespace on the left when it made wrapping decisions).  Doh.
> Attached for posterity, but it's useless.


> So I think pg_bsd_indent itself needs to be fixed.  I think I know
> where the problem is.  lexi.c isn't looking far ahead enough to
> recognise multi-line function declarations:

I experimented with fixing this.  I was able to get pg_bsd_indent to
distinguish multi-line function declarations from definitions, but it
turns out that it doesn't help your concern about the lines being too
long after re-indenting.  Contrary to what you imagine above, it seems
pg_bsd_indent will not reflow argument lists, regardless of whether it
thinks there needs to be more or less leading whitespace.  I'm a bit
surprised that -bc doesn't cause that to happen, but it doesn't (and I'm
not sure we'd really want to force one-parameter-per-line, anyway).

Anyway, the attached hasty-and-undercommented change to pg_bsd_indent
allows removal of the "Move prototype names to the same line as return
type" hack in pgindent, and we then get prototypes with properly
lined-up arguments, but we'll have a lot of places with over-length
lines needing manual fixing.  Unless somebody wants to find where to
fix that in pg_bsd_indent, but I've had my fill of looking at that
spaghetti code for today.

                        regards, tom lane

diff --git a/indent.h b/indent.h
index 0fffd89..1708dbc 100644
--- a/indent.h
+++ b/indent.h
@@ -41,6 +41,8 @@ void	diag2(int, const char *);
 void	diag3(int, const char *, int);
 void	diag4(int, const char *, int, int);
 void	dump_line(void);
+int	lookahead(void);
+void	lookahead_reset(void);
 void	fill_buffer(void);
 void	parse(int);
 void	pr_comment(void);
diff --git a/io.c b/io.c
index df11094..8d13a52 100644
--- a/io.c
+++ b/io.c
@@ -51,6 +51,13 @@ static char sccsid[] = "@(#)io.c	8.1 (Berkeley) 6/6/93";
 
 int         comment_open;
 static int  paren_target;
+
+static char *lookahead_buf;	/* malloc'd buffer, or NULL initially */
+static char *lookahead_buf_end; /* end+1 of allocated space */
+static char *lookahead_start;	/* => next char for fill_buffer() to fetch */
+static char *lookahead_ptr;	/* => next char for lookahead() to fetch */
+static char *lookahead_end;	/* last+1 valid char in lookahead_buf */
+
 static int pad_output(int current, int target);
 
 void
@@ -252,6 +259,58 @@ compute_label_target(void)
 	: ps.ind_size * (ps.ind_level - label_offset) + 1;
 }
 
+/*
+ * Read data ahead of what has been collected into in_buffer.
+ *
+ * Successive calls get further and further ahead, until we hit EOF.
+ * Call lookahead_reset to rescan from just beyond in_buffer.
+ */
+int
+lookahead(void)
+{
+    while (lookahead_ptr >= lookahead_end) {
+      int i = getc(input);
+
+      if (i == EOF)
+	return i;
+      if (i == '\0')
+	continue;		/* fill_buffer drops nulls, so do we */
+
+      if (lookahead_end >= lookahead_buf_end) {
+	/* Need to allocate or enlarge lookahead_buf */
+	char *new_buf;
+	size_t req;
+
+	if (lookahead_buf == NULL) {
+	  req = 64;
+	  new_buf = malloc(req);
+	} else {
+	  req = (lookahead_buf_end - lookahead_buf) * 2;
+	  new_buf = realloc(lookahead_buf, req);
+	}
+	if (new_buf == NULL)
+	  errx(1, "too much lookahead required");
+	lookahead_start = new_buf + (lookahead_start - lookahead_buf);
+	lookahead_ptr = new_buf + (lookahead_ptr - lookahead_buf);
+	lookahead_end = new_buf + (lookahead_end - lookahead_buf);
+	lookahead_buf = new_buf;
+	lookahead_buf_end = new_buf + req;
+      }
+
+      *lookahead_end++ = i;
+    }
+    return (unsigned char) *lookahead_ptr++;
+}
+
+/*
+ * Reset so that lookahead() will again scan from just beyond what's in
+ * in_buffer.
+ */
+void
+lookahead_reset(void)
+{
+    lookahead_ptr = lookahead_start;
+}
 
 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
@@ -293,11 +352,16 @@ fill_buffer(void)
 	    p = in_buffer + offset;
 	    in_buffer_limit = in_buffer + size - 2;
 	}
-	if ((i = getc(f)) == EOF) {
-		*p++ = ' ';
-		*p++ = '\n';
-		had_eof = true;
-		break;
+	if (lookahead_start < lookahead_end) {
+	  i = (unsigned char) *lookahead_start++;
+	} else {
+	  lookahead_start = lookahead_ptr = lookahead_end = lookahead_buf;
+	  if ((i = getc(f)) == EOF) {
+	    *p++ = ' ';
+	    *p++ = '\n';
+	    had_eof = true;
+	    break;
+	  }
 	}
 	if (i != '\0')
 	    *p++ = i;
diff --git a/lexi.c b/lexi.c
index 3c7bfef..e637e1a 100644
--- a/lexi.c
+++ b/lexi.c
@@ -148,6 +148,39 @@ strcmp_type(const void *e1, const void *e2)
     return (strcmp(e1, *(const char * const *)e2));
 }
 
+/*
+ * Scan over a function argument declaration list, then see if it is
+ * followed by ';' or ',' indicating that it's just a prototype.
+ *
+ * We do not detect comments, so you can fool this by putting unbalanced
+ * parens inside a comment within the argument list.  So don't do that.
+ */
+static int
+is_prototype(char *tp)
+{
+  int paren_depth = 0;
+
+  lookahead_reset();
+  for (;;) {
+    int c;
+
+    if (tp < buf_end)
+      c = *tp++;
+    else {
+	c = lookahead();
+	if (c == EOF)
+	  break;
+    }
+    if (c == '(')
+      paren_depth++;
+    else if (c == ')')
+      paren_depth--;
+    else if (paren_depth == 0 && !isspace((unsigned char) c))
+      return (c == ';' || c == ',');
+  }
+  return false;
+}
+
 int
 lexi(struct parser_state *state)
 {
@@ -348,15 +381,12 @@ lexi(struct parser_state *state)
 	}			/* end of if (found_it) */
 	if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
 	    state->in_parameter_declaration == 0 && state->block_init == 0) {
-	    char *tp = buf_ptr;
-	    while (tp < buf_end)
-		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
-		    goto not_proc;
+	  if (!is_prototype(buf_ptr)) {
 	    strncpy(state->procname, token, sizeof state->procname - 1);
 	    if (state->in_decl)
 		state->in_parameter_declaration = 1;
 	    return (funcname);
-    not_proc:;
+	  }
 	}
 	/*
 	 * The following hack attempts to guess whether or not the current

Re: Emacs vs pg_indent's weird indentation for function declarations

Reply via email to