Thomas Munro <[email protected]> writes:
> I tried teaching pgindent's post_indent subroutine to unmangle the
> multi-line declarations it mangles. That produces correct
> indentation! But can also produce lines that exceed the column limit
> we would normally wrap at (of course, because pg_bsd_indent had less
> whitespace on the left when it made wrapping decisions). Doh.
> Attached for posterity, but it's useless.
> So I think pg_bsd_indent itself needs to be fixed. I think I know
> where the problem is. lexi.c isn't looking far ahead enough to
> recognise multi-line function declarations:
I experimented with fixing this. I was able to get pg_bsd_indent to
distinguish multi-line function declarations from definitions, but it
turns out that it doesn't help your concern about the lines being too
long after re-indenting. Contrary to what you imagine above, it seems
pg_bsd_indent will not reflow argument lists, regardless of whether it
thinks there needs to be more or less leading whitespace. I'm a bit
surprised that -bc doesn't cause that to happen, but it doesn't (and I'm
not sure we'd really want to force one-parameter-per-line, anyway).
Anyway, the attached hasty-and-undercommented change to pg_bsd_indent
allows removal of the "Move prototype names to the same line as return
type" hack in pgindent, and we then get prototypes with properly
lined-up arguments, but we'll have a lot of places with over-length
lines needing manual fixing. Unless somebody wants to find where to
fix that in pg_bsd_indent, but I've had my fill of looking at that
spaghetti code for today.
regards, tom lane
diff --git a/indent.h b/indent.h
index 0fffd89..1708dbc 100644
--- a/indent.h
+++ b/indent.h
@@ -41,6 +41,8 @@ void diag2(int, const char *);
void diag3(int, const char *, int);
void diag4(int, const char *, int, int);
void dump_line(void);
+int lookahead(void);
+void lookahead_reset(void);
void fill_buffer(void);
void parse(int);
void pr_comment(void);
diff --git a/io.c b/io.c
index df11094..8d13a52 100644
--- a/io.c
+++ b/io.c
@@ -51,6 +51,13 @@ static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93";
int comment_open;
static int paren_target;
+
+static char *lookahead_buf; /* malloc'd buffer, or NULL initially */
+static char *lookahead_buf_end; /* end+1 of allocated space */
+static char *lookahead_start; /* => next char for fill_buffer() to fetch */
+static char *lookahead_ptr; /* => next char for lookahead() to fetch */
+static char *lookahead_end; /* last+1 valid char in lookahead_buf */
+
static int pad_output(int current, int target);
void
@@ -252,6 +259,58 @@ compute_label_target(void)
: ps.ind_size * (ps.ind_level - label_offset) + 1;
}
+/*
+ * Read data ahead of what has been collected into in_buffer.
+ *
+ * Successive calls get further and further ahead, until we hit EOF.
+ * Call lookahead_reset to rescan from just beyond in_buffer.
+ */
+int
+lookahead(void)
+{
+ while (lookahead_ptr >= lookahead_end) {
+ int i = getc(input);
+
+ if (i == EOF)
+ return i;
+ if (i == '\0')
+ continue; /* fill_buffer drops nulls, so do we */
+
+ if (lookahead_end >= lookahead_buf_end) {
+ /* Need to allocate or enlarge lookahead_buf */
+ char *new_buf;
+ size_t req;
+
+ if (lookahead_buf == NULL) {
+ req = 64;
+ new_buf = malloc(req);
+ } else {
+ req = (lookahead_buf_end - lookahead_buf) * 2;
+ new_buf = realloc(lookahead_buf, req);
+ }
+ if (new_buf == NULL)
+ errx(1, "too much lookahead required");
+ lookahead_start = new_buf + (lookahead_start - lookahead_buf);
+ lookahead_ptr = new_buf + (lookahead_ptr - lookahead_buf);
+ lookahead_end = new_buf + (lookahead_end - lookahead_buf);
+ lookahead_buf = new_buf;
+ lookahead_buf_end = new_buf + req;
+ }
+
+ *lookahead_end++ = i;
+ }
+ return (unsigned char) *lookahead_ptr++;
+}
+
+/*
+ * Reset so that lookahead() will again scan from just beyond what's in
+ * in_buffer.
+ */
+void
+lookahead_reset(void)
+{
+ lookahead_ptr = lookahead_start;
+}
/*
* Copyright (C) 1976 by the Board of Trustees of the University of Illinois
@@ -293,11 +352,16 @@ fill_buffer(void)
p = in_buffer + offset;
in_buffer_limit = in_buffer + size - 2;
}
- if ((i = getc(f)) == EOF) {
- *p++ = ' ';
- *p++ = '\n';
- had_eof = true;
- break;
+ if (lookahead_start < lookahead_end) {
+ i = (unsigned char) *lookahead_start++;
+ } else {
+ lookahead_start = lookahead_ptr = lookahead_end = lookahead_buf;
+ if ((i = getc(f)) == EOF) {
+ *p++ = ' ';
+ *p++ = '\n';
+ had_eof = true;
+ break;
+ }
}
if (i != '\0')
*p++ = i;
diff --git a/lexi.c b/lexi.c
index 3c7bfef..e637e1a 100644
--- a/lexi.c
+++ b/lexi.c
@@ -148,6 +148,39 @@ strcmp_type(const void *e1, const void *e2)
return (strcmp(e1, *(const char * const *)e2));
}
+/*
+ * Scan over a function argument declaration list, then see if it is
+ * followed by ';' or ',' indicating that it's just a prototype.
+ *
+ * We do not detect comments, so you can fool this by putting unbalanced
+ * parens inside a comment within the argument list. So don't do that.
+ */
+static int
+is_prototype(char *tp)
+{
+ int paren_depth = 0;
+
+ lookahead_reset();
+ for (;;) {
+ int c;
+
+ if (tp < buf_end)
+ c = *tp++;
+ else {
+ c = lookahead();
+ if (c == EOF)
+ break;
+ }
+ if (c == '(')
+ paren_depth++;
+ else if (c == ')')
+ paren_depth--;
+ else if (paren_depth == 0 && !isspace((unsigned char) c))
+ return (c == ';' || c == ',');
+ }
+ return false;
+}
+
int
lexi(struct parser_state *state)
{
@@ -348,15 +381,12 @@ lexi(struct parser_state *state)
} /* end of if (found_it) */
if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
state->in_parameter_declaration == 0 && state->block_init == 0) {
- char *tp = buf_ptr;
- while (tp < buf_end)
- if (*tp++ == ')' && (*tp == ';' || *tp == ','))
- goto not_proc;
+ if (!is_prototype(buf_ptr)) {
strncpy(state->procname, token, sizeof state->procname - 1);
if (state->in_decl)
state->in_parameter_declaration = 1;
return (funcname);
- not_proc:;
+ }
}
/*
* The following hack attempts to guess whether or not the current