The TODO at the top of gfc_cpp_post_options in gcc/fortran/cpp.cc has
been sitting there for many years:
/* TODO: allow non-traditional modes, e.g. by -cpp-std=...? */
cpp_option->traditional = 1;
This commit finally lifts that restriction. A new -fno-traditional-cpp
option, off by default so existing users see no change, picks the full C
preprocessor instead of the historic traditional mode.
For this to be safe in the presence of Fortran source, libcpp gains two
new c_lang values: CLK_GNUF77 for fixed-form and CLK_GNUF90 for
free-form Fortran. When either of them is active the lexer:
- emits "//" as two CPP_DIV tokens, since in Fortran "//" is the
string-concatenation operator rather than a C++ line comment,
- treats '!' outside of preprocessing directives as a Fortran line
comment, so that an unbalanced apostrophe inside a comment
("John's variable" and friends) does not start a runaway C
character literal,
- under CLK_GNUF77 only, additionally treats a column-1 'C', 'c',
'*', 'D' or 'd' as a fixed-form comment-line marker.
The driver also forces -C / -CC in non-traditional mode so that the
comments survive into the preprocessed output. This is not just
cosmetic, because compiler pragmas ("!$OMP", "!$ACC", "!GCC$") are
lexically Fortran comments but carry directive semantics that the
gfortran scanner must still see. Dropping them would silently disable
every parallel or offload directive in the program.
Running a C preprocessor over Fortran source is itself a GNU extension
(the Fortran standard does not specify a preprocessor), and the updated
invoke.texi now says so explicitly.
Bootstrapped and regtested on x86_64-pc-linux-gnu, no new test
failures introduced.
gcc/fortran/ChangeLog:
* cpp.cc (gfc_cpp_post_options): Honour flag_traditional_cpp.
Pick CLK_GNUF77 / CLK_GNUF90 for cpp_create_reader when running
the full C preprocessor, and force discard_comments off in that
mode. Reject -fno-traditional-cpp without -cpp.
* invoke.texi: Document -ftraditional-cpp /
-fno-traditional-cpp and note that running CPP over Fortran is
a GNU extension.
* lang.opt (ftraditional-cpp): New option.
libcpp/ChangeLog:
* include/cpplib.h (enum c_lang): Add CLK_GNUF77 and CLK_GNUF90.
* init.cc (lang_defaults): Add corresponding rows.
* lex.cc (save_comment): Preserve the leading comment-marker
byte from from[-1] instead of hard-coding '/'.
(_cpp_lex_direct): In Fortran modes treat '!' as a line comment
outside of directives and disable the C/C++ "//" line-comment
branch. In fixed-form Fortran additionally recognise the
column-1 'C'/'c'/'*'/'D'/'d' comment-line markers. Share the
comment-emission code via a new fortran_line_comment label.
gcc/testsuite/ChangeLog:
* gfortran.dg/cpp_non_traditional_1.F90: New test.
* gfortran.dg/cpp_non_traditional_2.F90: New test.
* gfortran.dg/cpp_non_traditional_3.F: New test.
* gfortran.dg/cpp_non_traditional_4.f90: New test.
* gfortran.dg/cpp_non_traditional_5.F90: New test.
Signed-off-by: Henri Menke <[email protected]>
---
gcc/fortran/cpp.cc | 31 ++++++++--
gcc/fortran/invoke.texi | 43 +++++++++++++-
gcc/fortran/lang.opt | 4 ++
.../gfortran.dg/cpp_non_traditional_1.F90 | 37 ++++++++++++
.../gfortran.dg/cpp_non_traditional_2.F90 | 23 ++++++++
.../gfortran.dg/cpp_non_traditional_3.F | 32 ++++++++++
.../gfortran.dg/cpp_non_traditional_4.f90 | 13 ++++
.../gfortran.dg/cpp_non_traditional_5.F90 | 18 ++++++
libcpp/include/cpplib.h | 3 +-
libcpp/init.cc | 4 +-
libcpp/lex.cc | 59 ++++++++++++++++++-
11 files changed, 255 insertions(+), 12 deletions(-)
create mode 100644 gcc/testsuite/gfortran.dg/cpp_non_traditional_1.F90
create mode 100644 gcc/testsuite/gfortran.dg/cpp_non_traditional_2.F90
create mode 100644 gcc/testsuite/gfortran.dg/cpp_non_traditional_3.F
create mode 100644 gcc/testsuite/gfortran.dg/cpp_non_traditional_4.f90
create mode 100644 gcc/testsuite/gfortran.dg/cpp_non_traditional_5.F90
diff --git a/gcc/fortran/cpp.cc b/gcc/fortran/cpp.cc
index 6b5f136e4f3..143ad2a7a04 100644
--- a/gcc/fortran/cpp.cc
+++ b/gcc/fortran/cpp.cc
@@ -500,13 +500,24 @@ gfc_cpp_post_options (bool verbose_missing_dir_warn)
|| gfc_cpp_option.print_include_names
|| gfc_cpp_option.no_line_commands
|| gfc_cpp_option.dump_macros
- || gfc_cpp_option.dump_includes))
+ || gfc_cpp_option.dump_includes
+ || !flag_traditional_cpp))
gfc_fatal_error ("To enable preprocessing, use %<-cpp%>");
if (!gfc_cpp_enabled ())
return;
- cpp_in = cpp_create_reader (CLK_GNUC89, NULL, line_table);
+ /* Traditional preprocessing has historically been used regardless of source
+ form, so we keep CLK_GNUC89 in that case for backwards compatibility.
+ Non-traditional mode picks a Fortran-specific language tag instead, so
+ that libcpp recognises "//" as the concatenation operator and '!' (along
+ with the fixed-form column-1 markers) as comment starters. */
+ enum c_lang cpp_lang = CLK_GNUC89;
+ if (!flag_traditional_cpp)
+ cpp_lang = (gfc_current_form == FORM_FIXED
+ ? CLK_GNUF77 : CLK_GNUF90);
+
+ cpp_in = cpp_create_reader (cpp_lang, NULL, line_table);
gcc_assert (cpp_in);
/* The cpp_options-structure defines far more flags than those set here.
@@ -515,10 +526,22 @@ gfc_cpp_post_options (bool verbose_missing_dir_warn)
cpp_option = cpp_get_options (cpp_in);
gcc_assert (cpp_option);
- /* TODO: allow non-traditional modes, e.g. by -cpp-std=...? */
- cpp_option->traditional = 1;
+ cpp_option->traditional = flag_traditional_cpp;
cpp_option->cplusplus_comments = 0;
+ /* When running the full (non-traditional) C preprocessor on Fortran source,
+ preserve comments unconditionally so that the Fortran scanner can
+ re-process them along with the surrounding tokens. This is particularly
+ important because compiler pragmas such as "!$OMP", "!$ACC" and "!GCC$"
+ are lexically Fortran comments but carry directive semantics that the
+ gfortran scanner must still see. Discarding them would silently drop
+ OpenMP / OpenACC / GCC directives from the preprocessed output. */
+ if (!flag_traditional_cpp)
+ {
+ gfc_cpp_option.discard_comments = 0;
+ gfc_cpp_option.discard_comments_in_macro_exp = 0;
+ }
+
cpp_option->cpp_pedantic = pedantic;
cpp_option->dollars_in_ident = flag_dollar_ok;
diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi
index b3e74148a08..5f2391ff66b 100644
--- a/gcc/fortran/invoke.texi
+++ b/gcc/fortran/invoke.texi
@@ -676,6 +676,15 @@ Part 3 of the Fortran 95 standard (ISO/IEC 1539-3:1998)
defines
Conditional Compilation, which is not widely used and not directly
supported by the GNU Fortran compiler.
+@cindex GNU extensions, preprocessor
+Note that running a C preprocessor over Fortran source is itself a
+GNU extension. The Fortran standard does not specify a preprocessor,
+so any behaviour outside of the lexical conventions described below is
+implementation-defined. Source files that rely on @option{-cpp} (or
+on the implicit preprocessing triggered by an upper-case
+@file{.F}-style extension) are therefore not portable between Fortran
+implementations.
+
The following options control preprocessing of Fortran code:
@table @gcctabopt
@@ -693,12 +702,40 @@ this option to manually enable preprocessing of any kind
of Fortran file.
To disable preprocessing of files with any of the above listed extensions,
use the negative form: @option{-nocpp}.
-The preprocessor is run in traditional mode. Any restrictions of the
-file format, especially the limits on line length, apply for
-preprocessed output as well, so it might be advisable to use the
+The preprocessor is run in traditional mode by default. Pass
+@option{-fno-traditional-cpp} to run the full C preprocessor instead.
+See @option{-ftraditional-cpp} below. Any restrictions of the file
+format, especially the limits on line length, apply for preprocessed
+output as well, so it might be advisable to use the
@option{-ffree-line-length-none} or @option{-ffixed-line-length-none}
options.
+@opindex ftraditional-cpp
+@opindex fno-traditional-cpp
+@cindex preprocessor, traditional
+@item -ftraditional-cpp
+@itemx -fno-traditional-cpp
+Pick between traditional and full C preprocessing of Fortran source.
+
+In traditional mode, which is the default, the preprocessor does no
+real tokenisation. Fortran-specific lexical constructs such as
+@code{//} (string concatenation), @samp{!} comments, and the
+@samp{'} and @samp{"} string delimiters therefore pass through
+unmolested, exactly as they have done for many years.
+
+With @option{-fno-traditional-cpp} gfortran instead invokes the full
+C preprocessor. This enables ISO-style function-like macros with
+@samp{#} stringification and @samp{##} token pasting. To keep the
+Fortran lexical conventions intact in this mode, libcpp is taught to
+recognise @samp{!} as a line comment outside of preprocessing
+directives and to leave @code{//} alone. In fixed-form sources, a
+@samp{C}, @samp{c}, @samp{*}, @samp{D} or @samp{d} in column 1 is
+additionally treated as a comment-line starter. Comments are
+preserved in the output, as if @option{-C} had been given, so that
+the gfortran scanner sees them again afterwards (which matters, since
+sentinels like @samp{!$OMP} are lexically comments but carry directive
+semantics).
+
@opindex dM
@cindex preprocessor, debugging
@cindex debugging, preprocessor
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index cbd13aa2019..3867bafc017 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -381,6 +381,10 @@ nocpp
Fortran Negative(cpp)
Disable preprocessing.
+ftraditional-cpp
+Fortran Var(flag_traditional_cpp) Init(1)
+Use traditional preprocessing. This is the default; pass -fno-traditional-cpp
to invoke the full C preprocessor instead.
+
d
Fortran Joined
; Documented in common.opt
diff --git a/gcc/testsuite/gfortran.dg/cpp_non_traditional_1.F90
b/gcc/testsuite/gfortran.dg/cpp_non_traditional_1.F90
new file mode 100644
index 00000000000..6ddf3ad02bc
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cpp_non_traditional_1.F90
@@ -0,0 +1,37 @@
+! { dg-do preprocess }
+! { dg-options "-cpp -fno-traditional-cpp -P" }
+!
+! Check that running the full (non-traditional) C preprocessor on Fortran
+! source still preserves the Fortran-specific lexical features that the
+! traditional mode used to protect:
+!
+! - "//" is the Fortran string-concatenation operator and must be emitted
+! as two CPP_DIV tokens, not swallowed as a C++-style line comment.
+! - '!' starts a Fortran line comment, even when the comment text contains
+! unbalanced apostrophes that would otherwise confuse the C
+! character-literal lexer.
+! - Comments are preserved verbatim in the output (as if -C had been
+! given) so that the Fortran scanner can re-process them.
+! - Function-like macros with '#' stringification and "##" token pasting
+! finally work, which is the whole reason for choosing non-traditional
+! mode in the first place.
+
+#define STR(x) #x
+#define CAT(a,b) a##b
+
+program test
+ character(len=12) :: greeting
+ greeting = "hello" // ", world!" ! it's a concat
+ print *, STR(spam)
+ print *, CAT(my_,var)
+end program test
+
+! "//" must survive as two tokens, possibly with whitespace between the
+! slashes because the non-traditional output reformats spacing.
+! { dg-final { scan-file "cpp_non_traditional_1.i" {"hello"[ \t]*/[ \t]*/[
\t]*", world!"} } }
+! The trailing '!' comment must be preserved verbatim.
+! { dg-final { scan-file "cpp_non_traditional_1.i" {! it's a concat} } }
+! Stringification:
+! { dg-final { scan-file "cpp_non_traditional_1.i" {"spam"} } }
+! Token pasting:
+! { dg-final { scan-file "cpp_non_traditional_1.i" {my_var} } }
diff --git a/gcc/testsuite/gfortran.dg/cpp_non_traditional_2.F90
b/gcc/testsuite/gfortran.dg/cpp_non_traditional_2.F90
new file mode 100644
index 00000000000..35cfc27aeb7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cpp_non_traditional_2.F90
@@ -0,0 +1,23 @@
+! { dg-do preprocess }
+! { dg-options "-cpp -fno-traditional-cpp -P" }
+!
+! Check that inside #if / #elif preprocessing expressions '!' still
+! means logical-not, even when the surrounding Fortran source treats it
+! as a line-comment marker.
+
+#define FOO 0
+
+#if !FOO
+ integer, parameter :: not_foo = 1
+#else
+#error "FOO should be falsy here"
+#endif
+
+#if !defined(BAR)
+ integer, parameter :: no_bar = 1
+#else
+#error "BAR should not be defined"
+#endif
+
+! { dg-final { scan-file "cpp_non_traditional_2.i" {not_foo} } }
+! { dg-final { scan-file "cpp_non_traditional_2.i" {no_bar} } }
diff --git a/gcc/testsuite/gfortran.dg/cpp_non_traditional_3.F
b/gcc/testsuite/gfortran.dg/cpp_non_traditional_3.F
new file mode 100644
index 00000000000..b3f875e18d8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cpp_non_traditional_3.F
@@ -0,0 +1,32 @@
+! { dg-do preprocess }
+! { dg-options "-cpp -fno-traditional-cpp -P" }
+!
+! Two fixed-form gotchas for non-traditional preprocessing:
+!
+! - A '!' comment containing an unbalanced apostrophe must not start
+! a C character literal that then runs all the way to end-of-file.
+! - The column-1 markers 'C', 'c', '*', 'D' and 'd' must be picked
+! up as fixed-form comment-line starters, even though those
+! characters are otherwise valid identifier or operator beginnings.
+! Without the fixed-form lexer hook in libcpp the apostrophes
+! inside such comment lines would, again, start runaway C
+! character literals.
+
+C This is a column-1 'C' comment with John's apostrophe.
+c This is a column-1 'c' comment, can't be a runaway literal.
+* This is a column-1 '*' comment with don't-tokenize-me.
+D This is a column-1 'D' debug comment.
+d This is a column-1 'd' debug comment.
+ program t
+ integer i
+ i = 1 ! John's variable
+ i = 2 ! don't tokenize the apostrophe
+ end program t
+
+! { dg-final { scan-file "cpp_non_traditional_3.i" {John's variable} } }
+! { dg-final { scan-file "cpp_non_traditional_3.i" {don't tokenize the
apostrophe} } }
+! { dg-final { scan-file "cpp_non_traditional_3.i" {C This is a column-1 'C'
comment} } }
+! { dg-final { scan-file "cpp_non_traditional_3.i" {c This is a column-1 'c'
comment} } }
+! { dg-final { scan-file "cpp_non_traditional_3.i" {\* This is a column-1 '\*'
comment} } }
+! { dg-final { scan-file "cpp_non_traditional_3.i" {D This is a column-1 'D'
debug comment} } }
+! { dg-final { scan-file "cpp_non_traditional_3.i" {d This is a column-1 'd'
debug comment} } }
diff --git a/gcc/testsuite/gfortran.dg/cpp_non_traditional_4.f90
b/gcc/testsuite/gfortran.dg/cpp_non_traditional_4.f90
new file mode 100644
index 00000000000..723a70ac883
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cpp_non_traditional_4.f90
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! { dg-options "-fno-traditional-cpp" }
+!
+! Passing -fno-traditional-cpp on its own should error out, since
+! preprocessing was not actually enabled. The user has to combine it
+! with -cpp (or rely on the implicit preprocessing implied by a .F90
+! extension, which this .f90 file deliberately does not have).
+
+program t
+end program t
+
+! { dg-error "To enable preprocessing, use .-cpp." "" { target *-*-* } 0 }
+! { dg-prune-output "compilation terminated" }
diff --git a/gcc/testsuite/gfortran.dg/cpp_non_traditional_5.F90
b/gcc/testsuite/gfortran.dg/cpp_non_traditional_5.F90
new file mode 100644
index 00000000000..13b515ac25a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cpp_non_traditional_5.F90
@@ -0,0 +1,18 @@
+! { dg-do preprocess }
+! { dg-options "-cpp -P" }
+!
+! Backwards-compatibility check. Without -fno-traditional-cpp the
+! preprocessor must stay in the traditional mode that gfortran has
+! always used. The output preserves the source byte-for-byte through
+! macro expansion, so "//" stays as written and "##" token pasting
+! is *not* available.
+
+#define CONCAT a // b
+
+program t
+ character(len=*), parameter :: x = CONCAT
+end program t
+
+! "//" must appear with no whitespace between the slashes, since
+! traditional mode does not retokenize.
+! { dg-final { scan-file "cpp_non_traditional_5.i" {a // b} } }
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index bc0d7714e96..96776b6a807 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -183,7 +183,8 @@ enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11,
CLK_GNUC17, CLK_GNUC23,
CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11,
CLK_GNUCXX14, CLK_CXX14, CLK_GNUCXX17, CLK_CXX17,
CLK_GNUCXX20, CLK_CXX20, CLK_GNUCXX23, CLK_CXX23,
- CLK_GNUCXX26, CLK_CXX26, CLK_ASM};
+ CLK_GNUCXX26, CLK_CXX26, CLK_ASM,
+ CLK_GNUF77, CLK_GNUF90};
/* Payload of a NUMBER, STRING, CHAR or COMMENT token. */
struct GTY(()) cpp_string {
diff --git a/libcpp/init.cc b/libcpp/init.cc
index a480d1d4a26..7994645222c 100644
--- a/libcpp/init.cc
+++ b/libcpp/init.cc
@@ -151,7 +151,9 @@ static const struct lang_flags lang_defaults[] = {
/* CXX23 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
/* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1 },
/* CXX26 */ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1 },
- /* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
+ /* ASM */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ /* GNUF77 */ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ /* GNUF90 */ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
};
/* Sets internal flags correctly for a given language. */
diff --git a/libcpp/lex.cc b/libcpp/lex.cc
index 9652e7d5aae..fcbc593d731 100644
--- a/libcpp/lex.cc
+++ b/libcpp/lex.cc
@@ -3069,7 +3069,9 @@ save_comment (cpp_reader *pfile, cpp_token *token, const
unsigned char *from,
token->val.str.len = clen;
token->val.str.text = buffer;
- buffer[0] = '/';
+ /* Preserve the starting delimiter of the comment: '/' for C and C++ style
+ comments, '!' or one of the fixed-form column-1 markers for Fortran. */
+ buffer[0] = from[-1];
memcpy (buffer + 1, from, len - 1);
/* Finish conversion to a C comment, if necessary. */
@@ -3963,6 +3965,16 @@ _cpp_lex_direct (cpp_reader *pfile)
result->src_loc = linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (buffer, buffer->cur));
+ /* In fixed-form Fortran a 'C', 'c', '*', 'D' or 'd' in column 1 starts
+ a comment line. The other Fortran comment marker, '!', works for
+ both fixed and free form and is handled by case '!' below. */
+ if (CPP_OPTION (pfile, lang) == CLK_GNUF77
+ && ! pfile->state.in_directive
+ && CPP_BUF_COLUMN (buffer, buffer->cur - 1) == 0
+ && (c == 'c' || c == 'C' || c == '*'
+ || c == 'd' || c == 'D'))
+ goto fortran_line_comment;
+
switch (c)
{
case ' ': case '\t': case '\f': case '\v': case '\0':
@@ -4081,7 +4093,9 @@ _cpp_lex_direct (cpp_reader *pfile)
if (_cpp_skip_block_comment (pfile))
cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
}
- else if (c == '/' && ! CPP_OPTION (pfile, traditional))
+ else if (c == '/' && ! CPP_OPTION (pfile, traditional)
+ && CPP_OPTION (pfile, lang) != CLK_GNUF90
+ && CPP_OPTION (pfile, lang) != CLK_GNUF77)
{
/* Don't warn for system headers. */
if (_cpp_in_system_header (pfile))
@@ -4335,7 +4349,46 @@ _cpp_lex_direct (cpp_reader *pfile)
case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
- case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
+ case '!':
+ if (!((CPP_OPTION (pfile, lang) == CLK_GNUF90
+ || CPP_OPTION (pfile, lang) == CLK_GNUF77)
+ && ! pfile->state.in_directive))
+ {
+ /* '!' is the logical-not operator, as in "#if !FOO". */
+ IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT);
+ break;
+ }
+ /* Fall through. */
+
+ fortran_line_comment:
+ /* A Fortran line comment. */
+ comment_start = buffer->cur;
+
+ if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
+ cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
+
+ if (fallthrough_comment_p (pfile, comment_start))
+ fallthrough_comment = true;
+
+ if (pfile->cb.comment)
+ {
+ size_t len = pfile->buffer->cur - comment_start;
+ pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
+ len + 1);
+ }
+
+ if (!pfile->state.save_comments)
+ {
+ result->flags |= PREV_WHITE;
+ goto update_tokens_line;
+ }
+
+ if (fallthrough_comment)
+ result->flags |= PREV_FALLTHROUGH;
+
+ /* Save the comment as a token in its own right. */
+ save_comment (pfile, result, comment_start, c);
+ break;
case '^':
result->type = CPP_XOR;
if (*buffer->cur == '=')
--
2.54.0