Hi!

The following patch adds another extension, gnu::base64.
As mentioned in the documentation, this extension is primarily
intended for use by the preprocessor, so that for the larger (say 32+ or
64+ bytes long embeds it doesn't have to emit tens of thousands or
millions of comma separated string literals which would be very expensive
to parse again, but can emit
#embed "." 
__gnu__::__base64__("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuCg==")
with the meaning don't actually load some file, instead base64 decode
(RFC4648 with A-Za-z0-9+/ chars and = padding, no newlines in between)
the string and use that as data.  This is chosen because it should be
-pedantic-errors clean, fairly cheap to decode and then in optimizing
compiler could be handled as similar binary blob to normal #embed,
while the data isn't left somewhere on the disk, so distcc/ccache etc.
can move the preprocessed source without issues.
It makes no sense to support limit and gnu::offset parameters together
with it IMHO, why would somebody waste providing full data and then
threw some away?  prefix/suffix/if_empty are normally supported though,
but not intended to be used by the preprocessor.

This patch adds just the extension side, not the actual emitting of this
during -E or -E -fdirectives-only for now, because that will be far easier
to be done when we introduce some token type and tree to handle the sequence
as a binary blob, once that is done printing of that new token type can
be printed as the #embed (or a series of them, I think we can't emit longer
string than 2GB).

Right now the patch only supports a single huge string literal in there,
not concatenation of multiple strings, dunno if we shouldn't add support
for that so that we don't run into the line length limits for column
numbering.  The alternative would be emit
#embed "." __gnu__::__base64__( \
"Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1" \
"aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOt" \
"c3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9z" \
"dGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVp" \
"YsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2Mg" \
"ZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjD" \
"qW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVl" \
"IG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDD" \
"qXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOp" \
"bnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFt" \
"IG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3Is" \
"IHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGks" \
"IGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBu" \
"ZWdlbnQu")
so effectively split it at the 76 columns boundaries (or some other one,
ideally multiple of 4); disadvantage is then that we lex that not into
a single huge string but perhaps hundreds/thousands/millions of short
CPP_STRINGs that would be gathered together.
Thoughts on that?

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-06-19  Jakub Jelinek  <ja...@redhat.com>

libcpp/
        * internal.h (struct cpp_embed_params): Add base64 member.
        * directives.cc (_cpp_parse_embed_params): Parse gnu::base64
        parameter.
        * files.cc (finish_embed, base64_dec_fn): New functions.
        (base64_dec): New array.
        (B64D0, B64D1, B64D2, B64D3): Define.
        (finish_base64_embed): New function.
        (_cpp_stack_embed): Use finish_embed.  Handle params->base64
        using finish_base64_embed.
gcc/
        * doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64
        parameter.
gcc/testsuite/
        * c-c++-common/cpp/embed-17.c: New test.
        * c-c++-common/cpp/embed-18.c: New test.

--- libcpp/internal.h.jj        2024-06-18 17:42:32.228010763 +0200
+++ libcpp/internal.h   2024-06-19 09:28:25.881760114 +0200
@@ -631,6 +631,7 @@ struct cpp_embed_params
   location_t loc;
   bool has_embed;
   cpp_num_part limit, offset;
+  const cpp_token *base64;
   cpp_embed_params_tokens prefix, suffix, if_empty;
 };
 
--- libcpp/directives.cc.jj     2024-06-18 17:42:32.000000000 +0200
+++ libcpp/directives.cc        2024-06-19 12:12:54.178141429 +0200
@@ -1032,12 +1032,22 @@ _cpp_parse_embed_params (cpp_reader *pfi
                  cpp_error (pfile, CPP_DL_ERROR, "expected ')'");
                  return false;
                }
-             return ret;
            }
-         else if (token->type == CPP_CLOSE_PAREN && params->has_embed)
-           return ret;
-         cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
-         return false;
+         else if (token->type != CPP_CLOSE_PAREN || !params->has_embed)
+           {
+             cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
+             return false;
+           }
+         if (params->base64 && (seen & ((1 << 0) | (1 << 5))) != 0)
+           {
+             ret = false;
+             if (!params->has_embed)
+               cpp_error_with_line (pfile, CPP_DL_ERROR,
+                                    params->base64->src_loc, 0,
+                                    "'gnu::base64' parameter conflicts with "
+                                    "'limit' or 'gnu::offset' parameters");
+           }
+         return ret;
        }
       param_name = NODE_NAME (token->val.node.spelling);
       param_name_len = NODE_LEN (token->val.node.spelling);
@@ -1113,7 +1123,7 @@ _cpp_parse_embed_params (cpp_reader *pfi
       else if (param_prefix_len == 3 && memcmp (param_prefix, "gnu", 3) == 0)
        {
          struct { int len; const char *name; } gnu_params[] = {
-           { 6, "offset" }
+           { 6, "base64" }, { 6, "offset" }
          };
          for (size_t i = 0;
               i < sizeof (gnu_params) / sizeof (gnu_params[0]); ++i)
@@ -1153,7 +1163,7 @@ _cpp_parse_embed_params (cpp_reader *pfi
       if (param_kind != (size_t) -1 && token->type != CPP_OPEN_PAREN)
        cpp_error_with_line (pfile, CPP_DL_ERROR, loc, 0,
                             "expected '('");
-      else if (param_kind == 0 || param_kind == 4)
+      else if (param_kind == 0 || param_kind == 5)
        {
          if (params->has_embed && pfile->op_stack == NULL)
            _cpp_expand_op_stack (pfile);
@@ -1170,6 +1180,26 @@ _cpp_parse_embed_params (cpp_reader *pfi
            }
          token = _cpp_get_token_no_padding (pfile);
        }
+      else if (param_kind == 4)
+       {
+         token = _cpp_get_token_no_padding (pfile);
+         if (token->type == CPP_STRING)
+           {
+             params->base64 = token;
+             token = _cpp_get_token_no_padding (pfile);
+             if (token->type != CPP_CLOSE_PAREN)
+               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+                                    "expected ')'");
+           }
+         else
+           {
+             cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+                                  "expected character string literal");
+             if (token->type != CPP_CLOSE_PAREN)
+               token = _cpp_get_token_no_padding (pfile);
+           }
+         token = _cpp_get_token_no_padding (pfile);
+       }
       else if (token->type == CPP_OPEN_PAREN)
        {
          cpp_embed_params_tokens *save = NULL;
--- libcpp/files.cc.jj  2024-06-18 17:42:32.229010750 +0200
+++ libcpp/files.cc     2024-06-19 12:44:40.169920948 +0200
@@ -1221,6 +1221,282 @@ cpp_probe_header_unit (cpp_reader *pfile
   return nullptr;
 }
 
+/* Helper function for _cpp_stack_embed.  Finish #embed/__has_embed processing
+   after a file is found and data loaded into buffer.  */
+
+static int
+finish_embed (cpp_reader *pfile, _cpp_file *file,
+             struct cpp_embed_params *params)
+{
+  const uchar *buffer = file->buffer;
+  size_t limit = file->limit;
+  if (params->offset - file->offset > limit)
+    limit = 0;
+  else
+    {
+      buffer += params->offset - file->offset;
+      limit -= params->offset - file->offset;
+    }
+  if (params->limit < limit)
+    limit = params->limit;
+
+  /* For sizes larger than say 64 bytes, this is just a temporary
+     solution, we should emit a single new token which the FEs will
+     handle as an optimization.  */
+  size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
+  if (limit > max / 2
+      || (limit
+         ? (params->prefix.count > max
+            || params->suffix.count > max
+            || (limit * 2 + params->prefix.count
+                + params->suffix.count > max))
+         : params->if_empty.count > max))
+    {
+      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                   "%s is too large", file->path);
+      return 0;
+    }
+
+  size_t len = 0;
+  for (size_t i = 0; i < limit; ++i)
+    {
+      if (buffer[i] < 10)
+       len += 2;
+      else if (buffer[i] < 100)
+       len += 3;
+#if UCHAR_MAX == 255
+      else
+       len += 4;
+#else
+      else if (buffer[i] < 1000)
+       len += 4;
+      else
+       {
+         char buf[64];
+         len += sprintf (buf, "%d", buffer[i]) + 1;
+       }
+#endif
+      if (len > INTTYPE_MAXIMUM (ssize_t))
+       {
+         cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                       "%s is too large", file->path);
+         return 0;
+       }
+    }
+  uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
+  _cpp_buff *tok_buff = NULL;
+  cpp_token *toks = NULL, *tok = &pfile->directive_result;
+  size_t count = 0;
+  if (limit)
+    count = (params->prefix.count + limit * 2 - 1
+            + params->suffix.count) - 1;
+  else if (params->if_empty.count)
+    count = params->if_empty.count - 1;
+  if (count)
+    {
+      tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
+      toks = (cpp_token *) tok_buff->base;
+    }
+  cpp_embed_params_tokens *prefix
+    = limit ? &params->prefix : &params->if_empty;
+  if (prefix->count)
+    {
+      *tok = *prefix->base_run.base;
+      tok = toks;
+      tokenrun *cur_run = &prefix->base_run;
+      while (cur_run)
+       {
+         size_t cnt = (cur_run->next ? cur_run->limit
+                       : prefix->cur_token) - cur_run->base;
+         cpp_token *t = cur_run->base;
+         if (cur_run == &prefix->base_run)
+           {
+             t++;
+             cnt--;
+           }
+         memcpy (tok, t, cnt * sizeof (cpp_token));
+         tok += cnt;
+         cur_run = cur_run->next;
+       }
+    }
+  for (size_t i = 0; i < limit; ++i)
+    {
+      tok->src_loc = params->loc;
+      tok->type = CPP_NUMBER;
+      tok->flags = NO_EXPAND;
+      if (i == 0)
+       tok->flags |= PREV_WHITE;
+      tok->val.str.text = s;
+      tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
+      s += tok->val.str.len + 1;
+      if (tok == &pfile->directive_result)
+       tok = toks;
+      else
+       tok++;
+      if (i < limit - 1)
+       {
+         tok->src_loc = params->loc;
+         tok->type = CPP_COMMA;
+         tok->flags = NO_EXPAND;
+         tok++;
+       }
+    }
+  if (limit && params->suffix.count)
+    {
+      tokenrun *cur_run = &params->suffix.base_run;
+      cpp_token *orig_tok = tok;
+      while (cur_run)
+       {
+         size_t cnt = (cur_run->next ? cur_run->limit
+                       : params->suffix.cur_token) - cur_run->base;
+         cpp_token *t = cur_run->base;
+         memcpy (tok, t, cnt * sizeof (cpp_token));
+         tok += cnt;
+         cur_run = cur_run->next;
+       }
+      orig_tok->flags |= PREV_WHITE;
+    }
+  pfile->directive_result.flags |= PREV_WHITE;
+  if (count)
+    {
+      _cpp_push_token_context (pfile, NULL, toks, count);
+      pfile->context->buff = tok_buff;
+    }
+  return limit ? 1 : 2;
+}
+
+/* Helper function for initialization of base64_dec table.
+   Can't rely on ASCII compatibility, so check each letter
+   separately.  */
+
+constexpr signed char
+base64_dec_fn (unsigned char c)
+{
+  return (c == 'A' ? 0 : c == 'B' ? 1 : c == 'C' ? 2 : c == 'D' ? 3
+         : c == 'E' ? 4 : c == 'F' ? 5 : c == 'G' ? 6 : c == 'H' ? 7
+         : c == 'I' ? 8 : c == 'J' ? 9 : c == 'K' ? 10 : c == 'L' ? 11
+         : c == 'M' ? 12 : c == 'N' ? 13 : c == 'O' ? 14 : c == 'P' ? 15
+         : c == 'Q' ? 16 : c == 'R' ? 17 : c == 'S' ? 18 : c == 'T' ? 19
+         : c == 'U' ? 20 : c == 'V' ? 21 : c == 'W' ? 22 : c == 'X' ? 23
+         : c == 'Y' ? 24 : c == 'Z' ? 25
+         : c == 'a' ? 26 : c == 'b' ? 27 : c == 'c' ? 28 : c == 'd' ? 29
+         : c == 'e' ? 30 : c == 'f' ? 31 : c == 'g' ? 32 : c == 'h' ? 33
+         : c == 'i' ? 34 : c == 'j' ? 35 : c == 'k' ? 36 : c == 'l' ? 37
+         : c == 'm' ? 38 : c == 'n' ? 39 : c == 'o' ? 40 : c == 'p' ? 41
+         : c == 'q' ? 42 : c == 'r' ? 43 : c == 's' ? 44 : c == 't' ? 45
+         : c == 'u' ? 46 : c == 'v' ? 47 : c == 'w' ? 48 : c == 'x' ? 49
+         : c == 'y' ? 50 : c == 'z' ? 51
+         : c == '0' ? 52 : c == '1' ? 53 : c == '2' ? 54 : c == '3' ? 55
+         : c == '4' ? 56 : c == '5' ? 57 : c == '6' ? 58 : c == '7' ? 59
+         : c == '8' ? 60 : c == '9' ? 61 : c == '+' ? 62 : c == '/' ? 63
+         : -1);
+}
+
+/* base64 decoding table.  */
+
+static constexpr signed char base64_dec[] = {
+#define B64D0(x) base64_dec_fn (x)
+#define B64D1(x) B64D0 (x), B64D0 (x + 1), B64D0 (x + 2), B64D0 (x + 3)
+#define B64D2(x) B64D1 (x), B64D1 (x + 4), B64D1 (x + 8), B64D1 (x + 12)
+#define B64D3(x) B64D2 (x), B64D2 (x + 16), B64D2 (x + 32), B64D2 (x + 48)
+  B64D3 (0), B64D3 (64), B64D3 (128), B64D3 (192)
+};
+
+/* Helper function for _cpp_stack_embed.  Handle #embed/__has_embed with
+   gnu::base64 parameter.  */
+
+static int
+finish_base64_embed (cpp_reader *pfile, const char *fname, bool angle,
+                    struct cpp_embed_params *params)
+{
+  size_t len, end, i, base64_len;
+  uchar *buf = NULL, *q, qbuf[3];
+  const uchar *p, *base64_str;
+  if (angle || strcmp (fname, "."))
+    {
+      if (!params->has_embed)
+       cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                     "'gnu::base64' parameter can be only used with \".\"");
+      return 0;
+    }
+  if (params->base64->val.str.len < 2
+      || params->base64->val.str.text[0] != '"'
+      || params->base64->val.str.text[params->base64->val.str.len - 1] != '"')
+    {
+    fail:
+      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                   "'gnu::base64' argument not valid base64 encoded string");
+      free (buf);
+      return 0;
+    }
+  base64_str = params->base64->val.str.text + 1;
+  base64_len = params->base64->val.str.len  - 2;
+  if ((base64_len & 3) != 0)
+    goto fail;
+  len = base64_len / 4 * 3;
+  end = len;
+  if (base64_len && base64_str[base64_len - 1] == '=')
+    {
+      end = len - 3;
+      --len;
+      if (base64_str[base64_len - 2] == '=')
+       --len;
+    }
+
+  if (params->has_embed)
+    q = qbuf;
+  else
+    {
+      buf = XNEWVEC (uchar, len ? len : 1);
+      q = buf;
+    }
+  for (i = 0, p = base64_str; i < end; i += 3, p += 4)
+    {
+      int a = base64_dec[p[0]];
+      int b = base64_dec[p[1]];
+      int c = base64_dec[p[2]];
+      int d = base64_dec[p[3]];
+      if (a == -1 || b == -1 || c == -1 || d == -1)
+       goto fail;
+      q[0] = (a << 2) | (b >> 4);
+      q[1] = (b << 4) | (c >> 2);
+      q[2] = (c << 6) | d;
+      if (!params->has_embed)
+       q += 3;
+    }
+  if (len != end)
+    {
+      int a = base64_dec[p[0]];
+      int b = base64_dec[p[1]];
+      if (a == -1 || b == -1)
+       goto fail;
+      q[0] = (a << 2) | (b >> 4);
+      if (len - end == 2)
+       {
+         int c = base64_dec[p[2]];
+         if (c == -1)
+           goto fail;
+         q[1] = (b << 4) | (c >> 2);
+         if ((c & 3) != 0)
+           goto fail;
+       }
+      else if ((b & 15) != 0)
+       goto fail;
+    }
+  if (params->has_embed)
+    return len ? 1 : 2;
+  _cpp_file *file = make_cpp_file (NULL, "");
+  file->embed = 1;
+  file->next_file = pfile->all_files;
+  pfile->all_files = file;
+  params->limit = -1;
+  params->offset = 0;
+  file->limit = len;
+  file->buffer = buf;
+  file->path = xstrdup ("<base64>");
+  return finish_embed (pfile, file, params);
+}
+
 /* Try to load FNAME with #embed/__has_embed parameters PARAMS.
    If !PARAMS->has_embed, return new token in pfile->directive_result
    (first token) and rest in a pushed non-macro context.
@@ -1231,6 +1507,8 @@ int
 _cpp_stack_embed (cpp_reader *pfile, const char *fname, bool angle,
                  struct cpp_embed_params *params)
 {
+  if (params->base64)
+    return finish_base64_embed (pfile, fname, angle, params);
   cpp_dir *dir = search_path_head (pfile, fname, angle, IT_EMBED,
                                   params->has_embed);
   if (!dir)
@@ -1450,141 +1728,7 @@ _cpp_stack_embed (cpp_reader *pfile, con
       return limit && params->limit ? 1 : 2;
     }
 
-  const uchar *buffer = file->buffer;
-  size_t limit = file->limit;
-  if (params->offset - file->offset > limit)
-    limit = 0;
-  else
-    {
-      buffer += params->offset - file->offset;
-      limit -= params->offset - file->offset;
-    }
-  if (params->limit < limit)
-    limit = params->limit;
-
-  /* For sizes larger than say 64 bytes, this is just a temporary
-     solution, we should emit a single new token which the FEs will
-     handle as an optimization.  */
-  size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
-  if (limit > max / 2
-      || (limit
-         ? (params->prefix.count > max
-            || params->suffix.count > max
-            || (limit * 2 + params->prefix.count
-                + params->suffix.count > max))
-         : params->if_empty.count > max))
-    {
-      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
-                   "%s is too large", file->path);
-      return 0;
-    }
-
-  size_t len = 0;
-  for (size_t i = 0; i < limit; ++i)
-    {
-      if (buffer[i] < 10)
-       len += 2;
-      else if (buffer[i] < 100)
-       len += 3;
-#if UCHAR_MAX == 255
-      else
-       len += 4;
-#else
-      else if (buffer[i] < 1000)
-       len += 4;
-      else
-       {
-         char buf[64];
-         len += sprintf (buf, "%d", buffer[i]) + 1;
-       }
-#endif
-      if (len > INTTYPE_MAXIMUM (ssize_t))
-       {
-         cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
-                       "%s is too large", file->path);
-         return 0;
-       }
-    }
-  uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
-  _cpp_buff *tok_buff = NULL;
-  cpp_token *toks = NULL, *tok = &pfile->directive_result;
-  size_t count = 0;
-  if (limit)
-    count = (params->prefix.count + limit * 2 - 1
-            + params->suffix.count) - 1;
-  else if (params->if_empty.count)
-    count = params->if_empty.count - 1;
-  if (count)
-    {
-      tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
-      toks = (cpp_token *) tok_buff->base;
-    }
-  cpp_embed_params_tokens *prefix
-    = limit ? &params->prefix : &params->if_empty;
-  if (prefix->count)
-    {
-      *tok = *prefix->base_run.base;
-      tok = toks;
-      tokenrun *cur_run = &prefix->base_run;
-      while (cur_run)
-       {
-         size_t cnt = (cur_run->next ? cur_run->limit
-                       : prefix->cur_token) - cur_run->base;
-         cpp_token *t = cur_run->base;
-         if (cur_run == &prefix->base_run)
-           {
-             t++;
-             cnt--;
-           }
-         memcpy (tok, t, cnt * sizeof (cpp_token));
-         tok += cnt;
-         cur_run = cur_run->next;
-       }
-    }
-  for (size_t i = 0; i < limit; ++i)
-    {
-      tok->src_loc = params->loc;
-      tok->type = CPP_NUMBER;
-      tok->flags = NO_EXPAND;
-      if (i == 0)
-       tok->flags |= PREV_WHITE;
-      tok->val.str.text = s;
-      tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
-      s += tok->val.str.len + 1;
-      if (tok == &pfile->directive_result)
-       tok = toks;
-      else
-       tok++;
-      if (i < limit - 1)
-       {
-         tok->src_loc = params->loc;
-         tok->type = CPP_COMMA;
-         tok->flags = NO_EXPAND;
-         tok++;
-       }
-    }
-  if (limit && params->suffix.count)
-    {
-      tokenrun *cur_run = &params->suffix.base_run;
-      cpp_token *orig_tok = tok;
-      while (cur_run)
-       {
-         size_t cnt = (cur_run->next ? cur_run->limit
-                       : params->suffix.cur_token) - cur_run->base;
-         cpp_token *t = cur_run->base;
-         memcpy (tok, t, cnt * sizeof (cpp_token));
-         tok += cnt;
-         cur_run = cur_run->next;
-       }
-      orig_tok->flags |= PREV_WHITE;
-    }
-  pfile->directive_result.flags |= PREV_WHITE;
-  if (count)
-    {
-      _cpp_push_token_context (pfile, NULL, toks, count);
-      pfile->context->buff = tok_buff;
-    }
-  return limit ? 1 : 2;
+  return finish_embed (pfile, file, params);
 }
 
 /* Retrofit the just-entered main file asif it was an include.  This
--- gcc/doc/cpp.texi.jj 2024-06-18 18:04:17.384265373 +0200
+++ gcc/doc/cpp.texi    2024-06-19 13:21:47.549613348 +0200
@@ -3967,7 +3967,8 @@ with currently supported standard parame
 @code{suffix} and @code{if_empty}, or implementation defined parameters
 specified by a unique vendor prefix followed by @code{::} followed by
 name of the parameter.  GCC uses the @code{gnu} prefix for vendor
-parameters and currently supports the @code{gnu::offset} parameter.
+parameters and currently supports the @code{gnu::offset} and
+@code{gnu::base64} parameters.
 
 The @code{limit} parameter argument is a constant expression which
 specifies the maximum number of bytes included by the directive,
@@ -3981,6 +3982,17 @@ The @code{gnu::offset} parameter argumen
 which specifies how many bytes to skip from the start of the resource.
 @code{limit} is then counted from that position.
 
+The @code{gnu::base64} parameter argument is single character string
+literal with base64 encoded data.  See
+@uref{https://datatracker.ietf.org/doc/html/rfc4648#section-4}.  There
+should be no new-lines in the string literal and because this parameter
+is meant namely for use by the preprocessor itself, there is no support
+for string literal concatenation in the argument.  If @code{gnu::base64}
+parameter is specified, the @code{limit} and @code{gnu::offset} parameters
+should not be specified and the filename should be always @code{"."}.
+Instead of reading a file the directive will decode the base64 encoded
+data and use that as the data to include.
+
 The @code{#embed} directive is not supported in the Traditional Mode
 (@pxref{Traditional Mode}).
 
--- gcc/testsuite/c-c++-common/cpp/embed-17.c.jj        2024-06-19 
11:34:28.587675536 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-17.c   2024-06-19 12:48:52.176725071 
+0200
@@ -0,0 +1,98 @@
+/* { dg-do run } */
+/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+
+#if __has_embed ("." gnu::base64 ("")) != __STDC_EMBED_EMPTY__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SA==")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." prefix(-) suffix (-) if_empty (-) __gnu__::__base64__ 
("SA==")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::__base64__ ("SGU=")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::__base64__ ("SGVs")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." __gnu__::base64 ("SGVsbG8=")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+/* M. Tulli Ciceronis De Finibus Bonorum et Malorum.  Liber Primus.  */
+/* echo "Tm9u....bnQu" | fmt -s -w 76 | base64 -d to decode.  */
+#define BONORUM_ET_MALORUM \
+"Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOtc3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9zdGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVpYsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2MgZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjDqW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVlIG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDDqXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOpbnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFtIG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3IsIHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGksIGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBuZWdlbnQu"
+#if __has_embed ("." gnu::base64 (BONORUM_ET_MALORUM)) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("foo" gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed (<foo> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed (<.> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SGU=") limit(5)) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SGU=") gnu::offset(2)) != 
__STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#embed "." gnu::base64 ("") if_empty (int a = 42;) prefix(+ +) suffix (+ +)
+#embed "." __gnu__::__base64__ ("SA==") prefix (int b = ) suffix (;) if_empty 
(+ +)
+const unsigned char c[] = {
+  #embed "." gnu::base64("SGU=")
+};
+const unsigned char d[] = {
+  #embed "." gnu::base64 ("SGVs")
+};
+const unsigned char e[] = {
+  #embed "." gnu::base64 ("SGVsbG8=")
+};
+const unsigned char f[] = {
+#ifdef __cplusplus
+  #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix (' ', )
+#else
+  #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix ([1] = ) suffix(, [0] = ' 
')
+#endif
+};
+
+#ifdef __cplusplus
+#define C "C"
+#else
+#define C
+#endif
+extern C void abort (void);
+extern C int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+int
+main ()
+{
+  if (a != 42 || b != 'H')
+    abort ();
+  if (sizeof (c) != 2 || c[0] != 'H' || c[1] != 'e')
+    abort ();
+  if (sizeof (d) != 3 || d[0] != 'H' || d[1] != 'e' || d[2] != 'l')
+    abort ();
+  if (sizeof (e) != 5 || memcmp (e, "Hello", 5))
+    abort ();
+  if (sizeof (f) != 1 + 747 || memcmp (f, " Non eram néscius, Brute",
+                                      sizeof (" Non eram néscius, Brute") - 1))
+    abort ();
+}
--- gcc/testsuite/c-c++-common/cpp/embed-18.c.jj        2024-06-19 
12:49:59.016876487 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-18.c   2024-06-19 13:12:48.552463081 
+0200
@@ -0,0 +1,46 @@
+/* { dg-do preprocess } */
+/* { dg-options "" } */
+
+#embed "." gnu::base64("") __gnu__::__base64__("") /* { dg-error "duplicate 
embed parameter 'gnu::base64'" } */
+#embed __FILE__ gnu::base64 prefix() suffix() /* { dg-error "expected '\\\('" 
} */
+#embed __FILE__ gnu::base64(1) prefix() suffix() /* { dg-error "expected 
character string literal" } */
+#embed __FILE__ gnu::base64() prefix() suffix() /* { dg-error "expected 
character string literal" } */
+#embed "." prefix() suffix() gnu::base64("" /* { dg-error "expected '\\\)'" } 
*/
+#embed "." gnu::base64("a") /* { dg-error "'gnu::base64' argument not valid 
base64 encoded string" } */
+#embed "." gnu::base64("----") /* { dg-error "'gnu::base64' argument not valid 
base64 encoded string" } */
+#embed "." gnu::base64("a===") /* { dg-error "'gnu::base64' argument not valid 
base64 encoded string" } */
+#embed "." 
gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg==")
 /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+/* No support for string concatenation in gnu::base64 argument.  */
+#embed "." 
gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg"
 \
+"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" 
\
+"YSBhbGlxdWEuCg==")
+/* { dg-error "expected '\\\)'" "" { target *-*-* } .-2 } */
+/* { dg-error "expected parameter name" "" { target *-*-* } .-2 } */
+#embed "embed-18.c" gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter 
can be only used with \\\".\\\"" } */
+#embed <embed-18.c> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter 
can be only used with \\\".\\\"" } */
+#embed <.> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be 
only used with \\\".\\\"" } */
+#embed "." gnu::base64("SA==") limit(3) /* { dg-error "'gnu::base64' parameter 
conflicts with 'limit' or 'gnu::offset' parameters" } */
+#embed "." gnu::base64("SA==") gnu::offset(1) /* { dg-error "'gnu::base64' 
parameter conflicts with 'limit' or 'gnu::offset' parameters" } */
+#if 1 + __has_embed ("." gnu::base64("") __gnu__::__base64__("")) /* { 
dg-error "duplicate embed parameter 'gnu::base64'" } */
+#endif
+#if 1 + __has_embed (__FILE__ __gnu__::__base64__ prefix() suffix()) /* { 
dg-error "expected '\\\('" } */
+#endif
+#if 1 + __has_embed (__FILE__ __gnu__::__base64__(1) prefix() suffix()) /* { 
dg-error "expected character string literal" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::base64() prefix() suffix()) /* { dg-error 
"expected character string literal" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("a")) /* { dg-error "'gnu::base64' 
argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("----")) /* { dg-error "'gnu::base64' 
argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("a===")) /* { dg-error "'gnu::base64' 
argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." 
gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg=="))
 /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." 
gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg"
 \
+"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" 
\
+"YSBhbGlxdWEuCg=="))
+/* { dg-error "expected '\\\)'" "" { target *-*-* } .-2 } */
+/* { dg-error "expected parameter name" "" { target *-*-* } .-2 } */
+/* { dg-error "missing '\\\(' in expression" "" { target *-*-* } .-3 } */
+#endif

        Jakub

Reply via email to