[PATCH] D87561: [Sema] List conversion validate character array

2020-10-03 Thread Mark de Wever via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Mordante marked 2 inline comments as done.
Closed by commit rG0ce6d6b46eb7: [Sema] List conversion validate character 
array. (authored by Mordante).

Changed prior to commit:
  https://reviews.llvm.org/D87561?vs=294556=295974#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaInit.cpp
  clang/lib/Sema/SemaOverload.cpp
  clang/test/CXX/drs/dr14xx.cpp
  clang/test/SemaObjCXX/overload.mm

Index: clang/test/SemaObjCXX/overload.mm
===
--- clang/test/SemaObjCXX/overload.mm
+++ clang/test/SemaObjCXX/overload.mm
@@ -201,3 +201,17 @@
 }
 
 }
+
+namespace StringLiterals {
+void f(const char(&&)[5]);
+void f(const wchar_t(&&)[5]);
+void f(const char16_t(&&)[5]);
+void f(const char32_t(&&)[5]);
+void g() {
+  f({"abc"});
+  f({(((@encode(int});
+  f({L"abc"});
+  f({uR"(abc)"});
+  f({(UR"(abc)")});
+}
+} // namespace StringLiterals
Index: clang/test/CXX/drs/dr14xx.cpp
===
--- clang/test/CXX/drs/dr14xx.cpp
+++ clang/test/CXX/drs/dr14xx.cpp
@@ -334,6 +334,22 @@
 
 X x;
 X x2{x};
+
+void f1(int);  // expected-note {{candidate function}}
+void f1(std::initializer_list) = delete; // expected-note {{candidate function has been explicitly deleted}}
+void g1() { f1({42}); }// expected-error {{call to deleted function 'f1'}}
+
+template 
+struct Pair {
+  Pair(T, U);
+};
+struct String {
+  String(const char *);
+};
+
+void f2(Pair);   // expected-note {{candidate function}}
+void f2(std::initializer_list) = delete; // expected-note {{candidate function has been explicitly deleted}}
+void g2() { f2({"foo", "bar"}); }// expected-error {{call to deleted function 'f2'}}
   } // dr_example
 
   namespace nonaggregate {
@@ -379,6 +395,46 @@
 struct Value { Value(Pair); Value(TwoPairs); };
 void f() { Value{{{1,2},{3,4}}}; }
   }
+  namespace NonAmbiguous {
+  // The original implementation made this case ambigious due to the special
+  // handling of one element initialization lists.
+  void f(int(&&)[1]);
+  void f(unsigned(&&)[1]);
+
+  void g(unsigned i) {
+f({i});
+  }
+  } // namespace NonAmbiguous
+
+#if __cplusplus >= 201103L
+  namespace StringLiterals {
+  // When the array size is 4 the call will attempt to bind an lvalue to an
+  // rvalue and fail. Therefore #2 will be called. (rsmith will bring this
+  // issue to CWG)
+  void f(const char(&&)[4]);  // expected-note 5 {{no known conversion}}
+  void f(const char(&&)[5]) = delete; // expected-note 2 {{candidate function has been explicitly deleted}} expected-note 3 {{no known conversion}}
+  void f(const wchar_t(&&)[4]);   // expected-note 5 {{no known conversion}}
+  void f(const wchar_t(&&)[5]) = delete;  // expected-note {{candidate function has been explicitly deleted}} expected-note 4 {{no known conversion}}
+#if __cplusplus >= 202002L
+  void f2(const char8_t(&&)[4]);  // expected-note {{no known conversion}}
+  void f2(const char8_t(&&)[5]) = delete; // expected-note {{candidate function has been explicitly deleted}}
+#endif
+  void f(const char16_t(&&)[4]);  // expected-note 5 {{no known conversion}}
+  void f(const char16_t(&&)[5]) = delete; // expected-note {{candidate function has been explicitly deleted}} expected-note 4 {{no known conversion}}
+  void f(const char32_t(&&)[4]);  // expected-note 5 {{no known conversion}}
+  void f(const char32_t(&&)[5]) = delete; // expected-note {{candidate function has been explicitly deleted}} expected-note 4 {{no known conversion}}
+  void g() {
+f({"abc"});   // expected-error {{call to deleted function 'f'}}
+f({((("abc")))}); // expected-error {{call to deleted function 'f'}}
+f({L"abc"});  // expected-error {{call to deleted function 'f'}}
+#if __cplusplus >= 202002L
+f2({u8"abc"});// expected-error {{call to deleted function 'f2'}}
+#endif
+f({uR"(abc)"});   // expected-error {{call to deleted function 'f'}}
+f({(UR"(abc)")}); // expected-error {{call to deleted function 'f'}}
+  }
+  } // namespace StringLiterals
+#endif
 } // dr1467
 
 namespace dr1490 {  // dr1490: 3.7 c++11
Index: clang/lib/Sema/SemaOverload.cpp
===
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -4984,18 +4984,19 @@
  InOverloadResolution,
  AllowObjCWritebackConversion);
 }
-// FIXME: Check the other conditions here: array of character type,
-// initializer is a string literal.
-if 

[PATCH] D87561: [Sema] List conversion validate character array

2020-10-03 Thread Mark de Wever via Phabricator via cfe-commits
Mordante marked 6 inline comments as done.
Mordante added inline comments.



Comment at: clang/test/CXX/drs/dr14xx.cpp:411-414
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);

rsmith wrote:
> Mordante wrote:
> > rsmith wrote:
> > > rsmith wrote:
> > > > Mordante wrote:
> > > > > rsmith wrote:
> > > > > > These should presumably be references to arrays, rather than 
> > > > > > arrays, or the parameter type is as if you wrote (for example) 
> > > > > > `void f(const char *)`, which shouldn't get the special treatment 
> > > > > > here.
> > > > > > 
> > > > > > [over.ics.list]p4 mentions this in its footnote:
> > > > > > 
> > > > > > "Otherwise, if the parameter type is a character array [Footnote: 
> > > > > > Since there are no parameters of array type, this will only occur 
> > > > > > as the referenced type of a reference parameter.] and the 
> > > > > > initializer list has a single element that is an 
> > > > > > appropriately-typed string-literal (9.4.3), the implicit conversion 
> > > > > > sequence is the identity conversion."
> > > > > Ah I missed that footnote. I reread the standard and can you confirm 
> > > > > some cases?
> > > > > ```
> > > > > namespace A { 
> > > > >   void f(const char(&)[4]);
> > > > >   void g() { f({"abc"}); }
> > > > > }
> > > > > namespace B { 
> > > > >   void f(const char(&&)[4]);
> > > > >   void g() { f({"abc"}); }
> > > > > } 
> > > > > ```
> > > > > both should work and with P0388 the array without bounds will also 
> > > > > work.
> > > > > 
> > > > > I ask since this is my interpretation of the standard, but it seems 
> > > > > there's a difference between implementations and `void f(const 
> > > > > char(&&)[4]);` fails for "abc" but works for "ab".
> > > > > It seems ICC and consider "abc" an lvalue in this case and not when 
> > > > > using "ab".
> > > > > 
> > > > > Here's a gotbolt link with the examples https://godbolt.org/z/r1TKfx
> > > > That's a really interesting example :)
> > > > 
> > > > The initializer is a list containing an lvalue of type `const char[4]`. 
> > > > Per [dcl.init.list]/3.9 and /3.10, the behavior depends on whether the 
> > > > referenced type is reference-related to `const char[4]` -- if so, then 
> > > > the reference can only bind directly and a `&&` reference will be 
> > > > invalid, because it's binding an rvalue reference to an lvalue, and if 
> > > > not, then we create an array temporary and the `&&` binding is fine.
> > > > 
> > > > Per [dcl.init.ref]/4, `const char[???]` is reference-related to `const 
> > > > char[4]` if they are similar types, and per [conv.qual]/2, the types 
> > > > are similar if `???` is omitted or `4`, and not similar otherwise.
> > > > 
> > > > So:
> > > > * `const char (&)[4] = {"abc"}` is ill-formed (types are the same, 
> > > > binds rvalue reference to lvalue)
> > > > * `const char (&&)[] = {"abc"}` is ill-formed (types are similar, binds 
> > > > rvalue reference to lvalue)
> > > > * `const char (&)[5] = {"abc"}` is OK (types are not similar, creates 
> > > > temporary)
> > > > 
> > > > That seems like a very surprising outcome to me. Perhaps we should 
> > > > probably ignore array bounds entirely when determining whether two 
> > > > types are reference-related. I'll take this to CWG for discussion.
> > > I think that's only an answer to half of your question. The other half is 
> > > that [over.ics.list]p4 does not apply (directly) to either of your 
> > > testcases, because the "parameter type" is a reference type, not an array 
> > > type. For:
> > > 
> > > ```
> > > namespace A { 
> > >   void f(const char(&)[4]);
> > >   void g() { f({"abc"}); }
> > > }
> > > ```
> > > 
> > > ... we reach [over.ics.list]p9, which says to use the rules in 
> > > [over.ics.ref]. Those rules say that if the reference binds directly to 
> > > an argument expression (ignore the "expression" here; this is very old 
> > > wording that predates braced initializers), then we form an identity 
> > > conversion. So that's what happens in this case.
> > > 
> > > For
> > > 
> > > ```
> > > namespace B { 
> > >   void f(const char(&&)[4]);
> > >   void g() { f({"abc"}); }
> > > }
> > > ```
> > > 
> > > the same thing happens, but now [over.ics.ref]p3 says "an implicit 
> > > conversion sequence cannot be formed if it requires binding an lvalue 
> > > reference other than a reference to a non-volatile const type to an 
> > > rvalue or binding an rvalue reference to an lvalue other than a function 
> > > lvalue", so the candidate is not viable.
> > > 
> > > If the array bound were omitted or were not 4, then the reference would 
> > > not bind directly, and we would instead consider initializing a 
> > > temporary. [over.ics.ref]p2 says "When a parameter of reference type is 
> > > not bound directly to an argument expression, the conversion sequence is 
> > > the one required to convert the argument expression to 

[PATCH] D87561: [Sema] List conversion validate character array

2020-09-27 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith accepted this revision.
rsmith added a comment.
This revision is now accepted and ready to land.

Thanks, only nits here. Please feel free to submit after addressing them, or 
request another round of review if you prefer.




Comment at: clang/lib/Sema/SemaInit.cpp:3115-3118
+bool Sema::IsStringInit(Expr *Init, const ArrayType *AT) {
+  return ::IsStringInit(Init, AT, Context) == SIF_None;
+}
+

Consider moving this up to around line 144 (just after the definition of 
`::IsStringInit`).



Comment at: clang/lib/Sema/SemaOverload.cpp:4988
+
+if (const auto *AT = S.Context.getAsArrayType(ToType))
+  if (S.IsStringInit(From->getInit(0), AT)) {

Nit: the `if` body is long, please add braces.



Comment at: clang/test/CXX/drs/dr14xx.cpp:338-340
+void f1(int);
+void f1(std::initializer_list);
+void g1() { f1({42}); }

It would be useful to also test that the right overload is used here, and for 
`g2`.



Comment at: clang/test/CXX/drs/dr14xx.cpp:424-431
+f({"abc"});
+f({((("abc")))});
+f({L"abc"});
+#if __cplusplus >= 202002L
+f({u8"abc"});
+#endif
+f({uR"(abc)"});

Please test that the correct overload is called in each of these cases 
(especially considering the unintuitive outcome of these calls).



Comment at: clang/test/CXX/drs/dr14xx.cpp:411-414
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);

Mordante wrote:
> rsmith wrote:
> > rsmith wrote:
> > > Mordante wrote:
> > > > rsmith wrote:
> > > > > These should presumably be references to arrays, rather than arrays, 
> > > > > or the parameter type is as if you wrote (for example) `void f(const 
> > > > > char *)`, which shouldn't get the special treatment here.
> > > > > 
> > > > > [over.ics.list]p4 mentions this in its footnote:
> > > > > 
> > > > > "Otherwise, if the parameter type is a character array [Footnote: 
> > > > > Since there are no parameters of array type, this will only occur as 
> > > > > the referenced type of a reference parameter.] and the initializer 
> > > > > list has a single element that is an appropriately-typed 
> > > > > string-literal (9.4.3), the implicit conversion sequence is the 
> > > > > identity conversion."
> > > > Ah I missed that footnote. I reread the standard and can you confirm 
> > > > some cases?
> > > > ```
> > > > namespace A { 
> > > >   void f(const char(&)[4]);
> > > >   void g() { f({"abc"}); }
> > > > }
> > > > namespace B { 
> > > >   void f(const char(&&)[4]);
> > > >   void g() { f({"abc"}); }
> > > > } 
> > > > ```
> > > > both should work and with P0388 the array without bounds will also work.
> > > > 
> > > > I ask since this is my interpretation of the standard, but it seems 
> > > > there's a difference between implementations and `void f(const 
> > > > char(&&)[4]);` fails for "abc" but works for "ab".
> > > > It seems ICC and consider "abc" an lvalue in this case and not when 
> > > > using "ab".
> > > > 
> > > > Here's a gotbolt link with the examples https://godbolt.org/z/r1TKfx
> > > That's a really interesting example :)
> > > 
> > > The initializer is a list containing an lvalue of type `const char[4]`. 
> > > Per [dcl.init.list]/3.9 and /3.10, the behavior depends on whether the 
> > > referenced type is reference-related to `const char[4]` -- if so, then 
> > > the reference can only bind directly and a `&&` reference will be 
> > > invalid, because it's binding an rvalue reference to an lvalue, and if 
> > > not, then we create an array temporary and the `&&` binding is fine.
> > > 
> > > Per [dcl.init.ref]/4, `const char[???]` is reference-related to `const 
> > > char[4]` if they are similar types, and per [conv.qual]/2, the types are 
> > > similar if `???` is omitted or `4`, and not similar otherwise.
> > > 
> > > So:
> > > * `const char (&)[4] = {"abc"}` is ill-formed (types are the same, 
> > > binds rvalue reference to lvalue)
> > > * `const char (&&)[] = {"abc"}` is ill-formed (types are similar, binds 
> > > rvalue reference to lvalue)
> > > * `const char (&)[5] = {"abc"}` is OK (types are not similar, creates 
> > > temporary)
> > > 
> > > That seems like a very surprising outcome to me. Perhaps we should 
> > > probably ignore array bounds entirely when determining whether two types 
> > > are reference-related. I'll take this to CWG for discussion.
> > I think that's only an answer to half of your question. The other half is 
> > that [over.ics.list]p4 does not apply (directly) to either of your 
> > testcases, because the "parameter type" is a reference type, not an array 
> > type. For:
> > 
> > ```
> > namespace A { 
> >   void f(const char(&)[4]);
> >   void g() { f({"abc"}); }
> > }
> > ```
> > 
> > ... we reach [over.ics.list]p9, which says to use the rules in 
> > [over.ics.ref]. Those rules say that 

[PATCH] D87561: [Sema] List conversion validate character array

2020-09-27 Thread Mark de Wever via Phabricator via cfe-commits
Mordante updated this revision to Diff 294556.
Mordante added a comment.

Addresses the review comments.
Adds an extra test case to test whether the proper overload is called. The 
proper overload is a bit of a surprise so when the expected behaviour changes 
the overload test can be adjusted.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaInit.cpp
  clang/lib/Sema/SemaOverload.cpp
  clang/test/CXX/drs/dr14xx.cpp
  clang/test/SemaCXX/overload-array-size.cpp
  clang/test/SemaObjCXX/overload.mm

Index: clang/test/SemaObjCXX/overload.mm
===
--- clang/test/SemaObjCXX/overload.mm
+++ clang/test/SemaObjCXX/overload.mm
@@ -201,3 +201,17 @@
 }
 
 }
+
+namespace StringLiterals {
+void f(const char(&&)[5]);
+void f(const wchar_t(&&)[5]);
+void f(const char16_t(&&)[5]);
+void f(const char32_t(&&)[5]);
+void g() {
+  f({"abc"});
+  f({(((@encode(int});
+  f({L"abc"});
+  f({uR"(abc)"});
+  f({(UR"(abc)")});
+}
+} // namespace StringLiterals
Index: clang/test/SemaCXX/overload-array-size.cpp
===
--- /dev/null
+++ clang/test/SemaCXX/overload-array-size.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -ast-dump | FileCheck %s
+
+// When the array size is 4 the call will attempt to bind an lvalue to an
+// rvalue and fail. Therfore #2 will be called. (rsmith will bring this issue
+// to CWG)
+void f(const char(&&)[4]); // #1
+void f(const char(&&)[5]); // #2
+void g() {
+  f({"abc"});
+  // CHECK: ExprWithCleanups {{.*}}  'void'
+  // CHECK-NEXT: CallExpr
+  // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(const char (&&)[5])'
+}
Index: clang/test/CXX/drs/dr14xx.cpp
===
--- clang/test/CXX/drs/dr14xx.cpp
+++ clang/test/CXX/drs/dr14xx.cpp
@@ -334,6 +334,22 @@
 
 X x;
 X x2{x};
+
+void f1(int);
+void f1(std::initializer_list);
+void g1() { f1({42}); }
+
+template 
+struct Pair {
+  Pair(T, U);
+};
+struct String {
+  String(const char *);
+};
+
+void f2(Pair);
+void f2(std::initializer_list);
+void g2() { f2({"foo", "bar"}); }
   } // dr_example
 
   namespace nonaggregate {
@@ -379,6 +395,43 @@
 struct Value { Value(Pair); Value(TwoPairs); };
 void f() { Value{{{1,2},{3,4}}}; }
   }
+  namespace NonAmbiguous {
+  // The original implementation made this case ambigious due to the special
+  // handling of one element initialization lists.
+  void f(int(&&)[1]);
+  void f(unsigned(&&)[1]);
+
+  void g(unsigned i) {
+f({i});
+  }
+  } // namespace NonAmbiguous
+
+#if __cplusplus >= 201103L
+  namespace StringLiterals {
+  void f(const char(&&)[4]);
+  void f(const char(&&)[5]);
+  void f(const wchar_t(&&)[4]);
+  void f(const wchar_t(&&)[5]);
+#if __cplusplus >= 202002L
+  void f(const char8_t(&&)[4]);
+  void f(const char8_t(&&)[5]);
+#endif
+  void f(const char16_t(&&)[4]);
+  void f(const char16_t(&&)[5]);
+  void f(const char32_t(&&)[4]);
+  void f(const char32_t(&&)[5]);
+  void g() {
+f({"abc"});
+f({((("abc")))});
+f({L"abc"});
+#if __cplusplus >= 202002L
+f({u8"abc"});
+#endif
+f({uR"(abc)"});
+f({(UR"(abc)")});
+  }
+  } // namespace StringLiterals
+#endif
 } // dr1467
 
 namespace dr1490 {  // dr1490: 3.7 c++11
Index: clang/lib/Sema/SemaOverload.cpp
===
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -4984,20 +4984,20 @@
  InOverloadResolution,
  AllowObjCWritebackConversion);
 }
-// FIXME: Check the other conditions here: array of character type,
-// initializer is a string literal.
-if (ToType->isArrayType()) {
-  InitializedEntity Entity =
-InitializedEntity::InitializeParameter(S.Context, ToType,
-   /*Consumed=*/false);
-  if (S.CanPerformCopyInitialization(Entity, From)) {
-Result.setStandard();
-Result.Standard.setAsIdentityConversion();
-Result.Standard.setFromType(ToType);
-Result.Standard.setAllToTypes(ToType);
-return Result;
+
+if (const auto *AT = S.Context.getAsArrayType(ToType))
+  if (S.IsStringInit(From->getInit(0), AT)) {
+InitializedEntity Entity =
+  InitializedEntity::InitializeParameter(S.Context, ToType,
+ /*Consumed=*/false);
+if (S.CanPerformCopyInitialization(Entity, From)) {
+  Result.setStandard();
+  Result.Standard.setAsIdentityConversion();
+  Result.Standard.setFromType(ToType);
+  Result.Standard.setAllToTypes(ToType);
+  return Result;
+}
   }
-}
   }
 
   // C++14 

[PATCH] D87561: [Sema] List conversion validate character array

2020-09-27 Thread Mark de Wever via Phabricator via cfe-commits
Mordante marked 2 inline comments as done.
Mordante added inline comments.



Comment at: clang/test/CXX/drs/dr14xx.cpp:411-414
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);

rsmith wrote:
> rsmith wrote:
> > Mordante wrote:
> > > rsmith wrote:
> > > > These should presumably be references to arrays, rather than arrays, or 
> > > > the parameter type is as if you wrote (for example) `void f(const char 
> > > > *)`, which shouldn't get the special treatment here.
> > > > 
> > > > [over.ics.list]p4 mentions this in its footnote:
> > > > 
> > > > "Otherwise, if the parameter type is a character array [Footnote: Since 
> > > > there are no parameters of array type, this will only occur as the 
> > > > referenced type of a reference parameter.] and the initializer list has 
> > > > a single element that is an appropriately-typed string-literal (9.4.3), 
> > > > the implicit conversion sequence is the identity conversion."
> > > Ah I missed that footnote. I reread the standard and can you confirm some 
> > > cases?
> > > ```
> > > namespace A { 
> > >   void f(const char(&)[4]);
> > >   void g() { f({"abc"}); }
> > > }
> > > namespace B { 
> > >   void f(const char(&&)[4]);
> > >   void g() { f({"abc"}); }
> > > } 
> > > ```
> > > both should work and with P0388 the array without bounds will also work.
> > > 
> > > I ask since this is my interpretation of the standard, but it seems 
> > > there's a difference between implementations and `void f(const 
> > > char(&&)[4]);` fails for "abc" but works for "ab".
> > > It seems ICC and consider "abc" an lvalue in this case and not when using 
> > > "ab".
> > > 
> > > Here's a gotbolt link with the examples https://godbolt.org/z/r1TKfx
> > That's a really interesting example :)
> > 
> > The initializer is a list containing an lvalue of type `const char[4]`. Per 
> > [dcl.init.list]/3.9 and /3.10, the behavior depends on whether the 
> > referenced type is reference-related to `const char[4]` -- if so, then the 
> > reference can only bind directly and a `&&` reference will be invalid, 
> > because it's binding an rvalue reference to an lvalue, and if not, then we 
> > create an array temporary and the `&&` binding is fine.
> > 
> > Per [dcl.init.ref]/4, `const char[???]` is reference-related to `const 
> > char[4]` if they are similar types, and per [conv.qual]/2, the types are 
> > similar if `???` is omitted or `4`, and not similar otherwise.
> > 
> > So:
> > * `const char (&)[4] = {"abc"}` is ill-formed (types are the same, binds 
> > rvalue reference to lvalue)
> > * `const char (&&)[] = {"abc"}` is ill-formed (types are similar, binds 
> > rvalue reference to lvalue)
> > * `const char (&)[5] = {"abc"}` is OK (types are not similar, creates 
> > temporary)
> > 
> > That seems like a very surprising outcome to me. Perhaps we should probably 
> > ignore array bounds entirely when determining whether two types are 
> > reference-related. I'll take this to CWG for discussion.
> I think that's only an answer to half of your question. The other half is 
> that [over.ics.list]p4 does not apply (directly) to either of your testcases, 
> because the "parameter type" is a reference type, not an array type. For:
> 
> ```
> namespace A { 
>   void f(const char(&)[4]);
>   void g() { f({"abc"}); }
> }
> ```
> 
> ... we reach [over.ics.list]p9, which says to use the rules in 
> [over.ics.ref]. Those rules say that if the reference binds directly to an 
> argument expression (ignore the "expression" here; this is very old wording 
> that predates braced initializers), then we form an identity conversion. So 
> that's what happens in this case.
> 
> For
> 
> ```
> namespace B { 
>   void f(const char(&&)[4]);
>   void g() { f({"abc"}); }
> }
> ```
> 
> the same thing happens, but now [over.ics.ref]p3 says "an implicit conversion 
> sequence cannot be formed if it requires binding an lvalue reference other 
> than a reference to a non-volatile const type to an rvalue or binding an 
> rvalue reference to an lvalue other than a function lvalue", so the candidate 
> is not viable.
> 
> If the array bound were omitted or were not 4, then the reference would not 
> bind directly, and we would instead consider initializing a temporary. 
> [over.ics.ref]p2 says "When a parameter of reference type is not bound 
> directly to an argument expression, the conversion sequence is the one 
> required to convert the argument expression to the referenced type according 
> to 12.4.4.2.", which takes us back around to [over.ics.list] with the 
> "parameter type" now being the array type. That's how we can reach 
> [over.ics.list]p4 and consider string literal -> array initialization.
> 
> So I think that, according to the current rules, for
> 
> ```
> void f(const char (&&)[4]); // #1
> void f(const char (&&)[5]); // #2
> ```
> 
> ... a call to `f({"abc"})` remarkably calls #2, because #1 is not 

[PATCH] D87561: [Sema] List conversion validate character array

2020-09-20 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added inline comments.



Comment at: clang/test/CXX/drs/dr14xx.cpp:411-414
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);

rsmith wrote:
> Mordante wrote:
> > rsmith wrote:
> > > These should presumably be references to arrays, rather than arrays, or 
> > > the parameter type is as if you wrote (for example) `void f(const char 
> > > *)`, which shouldn't get the special treatment here.
> > > 
> > > [over.ics.list]p4 mentions this in its footnote:
> > > 
> > > "Otherwise, if the parameter type is a character array [Footnote: Since 
> > > there are no parameters of array type, this will only occur as the 
> > > referenced type of a reference parameter.] and the initializer list has a 
> > > single element that is an appropriately-typed string-literal (9.4.3), the 
> > > implicit conversion sequence is the identity conversion."
> > Ah I missed that footnote. I reread the standard and can you confirm some 
> > cases?
> > ```
> > namespace A { 
> >   void f(const char(&)[4]);
> >   void g() { f({"abc"}); }
> > }
> > namespace B { 
> >   void f(const char(&&)[4]);
> >   void g() { f({"abc"}); }
> > } 
> > ```
> > both should work and with P0388 the array without bounds will also work.
> > 
> > I ask since this is my interpretation of the standard, but it seems there's 
> > a difference between implementations and `void f(const char(&&)[4]);` fails 
> > for "abc" but works for "ab".
> > It seems ICC and consider "abc" an lvalue in this case and not when using 
> > "ab".
> > 
> > Here's a gotbolt link with the examples https://godbolt.org/z/r1TKfx
> That's a really interesting example :)
> 
> The initializer is a list containing an lvalue of type `const char[4]`. Per 
> [dcl.init.list]/3.9 and /3.10, the behavior depends on whether the referenced 
> type is reference-related to `const char[4]` -- if so, then the reference can 
> only bind directly and a `&&` reference will be invalid, because it's binding 
> an rvalue reference to an lvalue, and if not, then we create an array 
> temporary and the `&&` binding is fine.
> 
> Per [dcl.init.ref]/4, `const char[???]` is reference-related to `const 
> char[4]` if they are similar types, and per [conv.qual]/2, the types are 
> similar if `???` is omitted or `4`, and not similar otherwise.
> 
> So:
> * `const char (&)[4] = {"abc"}` is ill-formed (types are the same, binds 
> rvalue reference to lvalue)
> * `const char (&&)[] = {"abc"}` is ill-formed (types are similar, binds 
> rvalue reference to lvalue)
> * `const char (&)[5] = {"abc"}` is OK (types are not similar, creates 
> temporary)
> 
> That seems like a very surprising outcome to me. Perhaps we should probably 
> ignore array bounds entirely when determining whether two types are 
> reference-related. I'll take this to CWG for discussion.
I think that's only an answer to half of your question. The other half is that 
[over.ics.list]p4 does not apply (directly) to either of your testcases, 
because the "parameter type" is a reference type, not an array type. For:

```
namespace A { 
  void f(const char(&)[4]);
  void g() { f({"abc"}); }
}
```

... we reach [over.ics.list]p9, which says to use the rules in [over.ics.ref]. 
Those rules say that if the reference binds directly to an argument expression 
(ignore the "expression" here; this is very old wording that predates braced 
initializers), then we form an identity conversion. So that's what happens in 
this case.

For

```
namespace B { 
  void f(const char(&&)[4]);
  void g() { f({"abc"}); }
}
```

the same thing happens, but now [over.ics.ref]p3 says "an implicit conversion 
sequence cannot be formed if it requires binding an lvalue reference other than 
a reference to a non-volatile const type to an rvalue or binding an rvalue 
reference to an lvalue other than a function lvalue", so the candidate is not 
viable.

If the array bound were omitted or were not 4, then the reference would not 
bind directly, and we would instead consider initializing a temporary. 
[over.ics.ref]p2 says "When a parameter of reference type is not bound directly 
to an argument expression, the conversion sequence is the one required to 
convert the argument expression to the referenced type according to 12.4.4.2.", 
which takes us back around to [over.ics.list] with the "parameter type" now 
being the array type. That's how we can reach [over.ics.list]p4 and consider 
string literal -> array initialization.

So I think that, according to the current rules, for

```
void f(const char (&&)[4]); // #1
void f(const char (&&)[5]); // #2
```

... a call to `f({"abc"})` remarkably calls #2, because #1 is not viable (would 
bind rvalue reference to lvalue string literal), but #2 is, just like `const 
char (&)[4] = {"abc"};` is ill-formed but `const char (&)[5] = {"abc"};` is 
valid.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  

[PATCH] D87561: [Sema] List conversion validate character array

2020-09-20 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added inline comments.



Comment at: clang/test/CXX/drs/dr14xx.cpp:411-414
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);

Mordante wrote:
> rsmith wrote:
> > These should presumably be references to arrays, rather than arrays, or the 
> > parameter type is as if you wrote (for example) `void f(const char *)`, 
> > which shouldn't get the special treatment here.
> > 
> > [over.ics.list]p4 mentions this in its footnote:
> > 
> > "Otherwise, if the parameter type is a character array [Footnote: Since 
> > there are no parameters of array type, this will only occur as the 
> > referenced type of a reference parameter.] and the initializer list has a 
> > single element that is an appropriately-typed string-literal (9.4.3), the 
> > implicit conversion sequence is the identity conversion."
> Ah I missed that footnote. I reread the standard and can you confirm some 
> cases?
> ```
> namespace A { 
>   void f(const char(&)[4]);
>   void g() { f({"abc"}); }
> }
> namespace B { 
>   void f(const char(&&)[4]);
>   void g() { f({"abc"}); }
> } 
> ```
> both should work and with P0388 the array without bounds will also work.
> 
> I ask since this is my interpretation of the standard, but it seems there's a 
> difference between implementations and `void f(const char(&&)[4]);` fails for 
> "abc" but works for "ab".
> It seems ICC and consider "abc" an lvalue in this case and not when using 
> "ab".
> 
> Here's a gotbolt link with the examples https://godbolt.org/z/r1TKfx
That's a really interesting example :)

The initializer is a list containing an lvalue of type `const char[4]`. Per 
[dcl.init.list]/3.9 and /3.10, the behavior depends on whether the referenced 
type is reference-related to `const char[4]` -- if so, then the reference can 
only bind directly and a `&&` reference will be invalid, because it's binding 
an rvalue reference to an lvalue, and if not, then we create an array temporary 
and the `&&` binding is fine.

Per [dcl.init.ref]/4, `const char[???]` is reference-related to `const char[4]` 
if they are similar types, and per [conv.qual]/2, the types are similar if 
`???` is omitted or `4`, and not similar otherwise.

So:
* `const char (&)[4] = {"abc"}` is ill-formed (types are the same, binds 
rvalue reference to lvalue)
* `const char (&&)[] = {"abc"}` is ill-formed (types are similar, binds rvalue 
reference to lvalue)
* `const char (&)[5] = {"abc"}` is OK (types are not similar, creates 
temporary)

That seems like a very surprising outcome to me. Perhaps we should probably 
ignore array bounds entirely when determining whether two types are 
reference-related. I'll take this to CWG for discussion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87561: [Sema] List conversion validate character array

2020-09-19 Thread Mark de Wever via Phabricator via cfe-commits
Mordante added inline comments.



Comment at: clang/test/CXX/drs/dr14xx.cpp:411-414
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);

rsmith wrote:
> These should presumably be references to arrays, rather than arrays, or the 
> parameter type is as if you wrote (for example) `void f(const char *)`, which 
> shouldn't get the special treatment here.
> 
> [over.ics.list]p4 mentions this in its footnote:
> 
> "Otherwise, if the parameter type is a character array [Footnote: Since there 
> are no parameters of array type, this will only occur as the referenced type 
> of a reference parameter.] and the initializer list has a single element that 
> is an appropriately-typed string-literal (9.4.3), the implicit conversion 
> sequence is the identity conversion."
Ah I missed that footnote. I reread the standard and can you confirm some cases?
```
namespace A { 
  void f(const char(&)[4]);
  void g() { f({"abc"}); }
}
namespace B { 
  void f(const char(&&)[4]);
  void g() { f({"abc"}); }
} 
```
both should work and with P0388 the array without bounds will also work.

I ask since this is my interpretation of the standard, but it seems there's a 
difference between implementations and `void f(const char(&&)[4]);` fails for 
"abc" but works for "ab".
It seems ICC and consider "abc" an lvalue in this case and not when using "ab".

Here's a gotbolt link with the examples https://godbolt.org/z/r1TKfx


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87561: [Sema] List conversion validate character array

2020-09-16 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added inline comments.



Comment at: clang/lib/Sema/SemaOverload.cpp:4989
+if (ToType->isArrayType() && ToType->isCharType() &&
+isa(From->getInit(0))) {
   InitializedEntity Entity =

Mordante wrote:
> rsmith wrote:
> > This is too narrow a check in two ways: we should allow parenthesized 
> > string literals here, and we should allow `ObjCEncodeExpr`.
> Actually it seems the code isn't behaving properly at all. It seems the 
> conversion is done by
> https://github.com/llvm/llvm-project/blob/master/clang/lib/Sema/SemaOverload.cpp#L5174
> resulting in an array-to-pointer conversion instead of an identity conversion.
> 
> It can solve it by manually removing the decay like:
> ```
> if (const auto *DT = dyn_cast(ToType))
>   if (const auto *AT = S.Context.getAsArrayType(DT->getOriginalType()))
> if (S.IsStringInit(From->getInit(0), AT) {
>...
> ```
> This code works and results in an identity conversion. But it feels a bit odd 
> to manually peel away the array-to-pointer decay. Is this the best solution 
> or do you have a better suggestions?
>   
> 
I think this is a bug in your testcases. I'll comment below.



Comment at: clang/test/CXX/drs/dr14xx.cpp:411-414
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);

These should presumably be references to arrays, rather than arrays, or the 
parameter type is as if you wrote (for example) `void f(const char *)`, which 
shouldn't get the special treatment here.

[over.ics.list]p4 mentions this in its footnote:

"Otherwise, if the parameter type is a character array [Footnote: Since there 
are no parameters of array type, this will only occur as the referenced type of 
a reference parameter.] and the initializer list has a single element that is 
an appropriately-typed string-literal (9.4.3), the implicit conversion sequence 
is the identity conversion."


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87561: [Sema] List conversion validate character array

2020-09-15 Thread Mark de Wever via Phabricator via cfe-commits
Mordante marked 2 inline comments as done.
Mordante added inline comments.



Comment at: clang/lib/Sema/SemaOverload.cpp:4989
+if (ToType->isArrayType() && ToType->isCharType() &&
+isa(From->getInit(0))) {
   InitializedEntity Entity =

rsmith wrote:
> This is too narrow a check in two ways: we should allow parenthesized string 
> literals here, and we should allow `ObjCEncodeExpr`.
Actually it seems the code isn't behaving properly at all. It seems the 
conversion is done by
https://github.com/llvm/llvm-project/blob/master/clang/lib/Sema/SemaOverload.cpp#L5174
resulting in an array-to-pointer conversion instead of an identity conversion.

It can solve it by manually removing the decay like:
```
if (const auto *DT = dyn_cast(ToType))
  if (const auto *AT = S.Context.getAsArrayType(DT->getOriginalType()))
if (S.IsStringInit(From->getInit(0), AT) {
   ...
```
This code works and results in an identity conversion. But it feels a bit odd 
to manually peel away the array-to-pointer decay. Is this the best solution or 
do you have a better suggestions?
  



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87561: [Sema] List conversion validate character array

2020-09-14 Thread Mark de Wever via Phabricator via cfe-commits
Mordante marked 3 inline comments as done.
Mordante added a comment.

Thanks for the feedback!




Comment at: clang/lib/Sema/SemaOverload.cpp:4988
+
+if (ToType->isArrayType() && ToType->isCharType() &&
+isa(From->getInit(0))) {

rsmith wrote:
> `isCharType` is too narrow a check here. We also need to check for all the 
> other types that can be initialized from a string literal (`wchar_t`, 
> `char16_t`, ... -- including `unsigned short` in some cases). These details 
> are handled by 
> [`IsStringInit`](https://github.com/llvm/llvm-project/blob/master/clang/lib/Sema/SemaInit.cpp#L136).
>  Maybe it's worth exposing that as a `bool Sema::isStringInit` function or 
> similar to use from here?
I think exposing this function sounds good.



Comment at: clang/lib/Sema/SemaOverload.cpp:4991-4992
   InitializedEntity Entity =
-InitializedEntity::InitializeParameter(S.Context, ToType,
-   /*Consumed=*/false);
+  InitializedEntity::InitializeParameter(S.Context, ToType,
+ /*Consumed=*/false);
   if (S.CanPerformCopyInitialization(Entity, From)) {

rsmith wrote:
> Phabricator thinks you converted spaces to tabs here. Please can you check 
> and convert back if necessary?
That's odd in my editor I see spaces. I ran `git clang-format` on an earlier 
version where I made some changes here. I'll revert the hunk.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87561: [Sema] List conversion validate character array

2020-09-13 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added inline comments.



Comment at: clang/lib/Sema/SemaOverload.cpp:4988
+
+if (ToType->isArrayType() && ToType->isCharType() &&
+isa(From->getInit(0))) {

`isCharType` is too narrow a check here. We also need to check for all the 
other types that can be initialized from a string literal (`wchar_t`, 
`char16_t`, ... -- including `unsigned short` in some cases). These details are 
handled by 
[`IsStringInit`](https://github.com/llvm/llvm-project/blob/master/clang/lib/Sema/SemaInit.cpp#L136).
 Maybe it's worth exposing that as a `bool Sema::isStringInit` function or 
similar to use from here?



Comment at: clang/lib/Sema/SemaOverload.cpp:4989
+if (ToType->isArrayType() && ToType->isCharType() &&
+isa(From->getInit(0))) {
   InitializedEntity Entity =

This is too narrow a check in two ways: we should allow parenthesized string 
literals here, and we should allow `ObjCEncodeExpr`.



Comment at: clang/lib/Sema/SemaOverload.cpp:4991-4992
   InitializedEntity Entity =
-InitializedEntity::InitializeParameter(S.Context, ToType,
-   /*Consumed=*/false);
+  InitializedEntity::InitializeParameter(S.Context, ToType,
+ /*Consumed=*/false);
   if (S.CanPerformCopyInitialization(Entity, From)) {

Phabricator thinks you converted spaces to tabs here. Please can you check and 
convert back if necessary?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87561/new/

https://reviews.llvm.org/D87561

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87561: [Sema] List conversion validate character array

2020-09-12 Thread Mark de Wever via Phabricator via cfe-commits
Mordante created this revision.
Mordante added reviewers: lvoufo, rsmith.
Mordante added a project: clang.
Mordante requested review of this revision.

The function `TryListConversion` didn't properly validate the following part of 
the standard:

  Otherwise, if the parameter type is a character array [... ]
  and the initializer list has a single element that is an
  appropriately-typed string literal (8.5.2 [dcl.init.string]), the
  implicit conversion sequence is the identity conversion.

This caused the following call to `f()` to be ambiguous.

  void f(int(&&)[1]);
  void f(unsigned(&&)[1]);
  
  void g(unsigned i) {
f({i});
  }

This issue only occurrs when the initializer list had one element.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D87561

Files:
  clang/lib/Sema/SemaOverload.cpp
  clang/test/CXX/drs/dr14xx.cpp


Index: clang/test/CXX/drs/dr14xx.cpp
===
--- clang/test/CXX/drs/dr14xx.cpp
+++ clang/test/CXX/drs/dr14xx.cpp
@@ -334,6 +334,22 @@
 
 X x;
 X x2{x};
+
+void f1(int);
+void f1(std::initializer_list);
+void g1() { f1({42}); }
+
+template 
+struct Pair {
+  Pair(T, U);
+};
+struct String {
+  String(const char *);
+};
+
+void f2(Pair);
+void f2(std::initializer_list);
+void g2() { f2({"foo", "bar"}); }
   } // dr_example
 
   namespace nonaggregate {
@@ -379,6 +395,31 @@
 struct Value { Value(Pair); Value(TwoPairs); };
 void f() { Value{{{1,2},{3,4}}}; }
   }
+  namespace NonAmbiguous {
+  // The original implementation made this case ambigious due to the special
+  // handling of one element initialization lists.
+  void f(int(&&)[1]);
+  void f(unsigned(&&)[1]);
+
+  void g(unsigned i) {
+f({i});
+  }
+  } // namespace NonAmbiguous
+
+#if __cplusplus >= 201103L
+  namespace StringLiterals {
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);
+  void g() {
+f({"abc"}); // expected-warning {{braces around scalar initializer}}
+f({L"abc"});// expected-warning {{braces around scalar initializer}}
+f({uR"(abc)"}); // expected-warning {{braces around scalar initializer}}
+f({UR"(abc)"}); // expected-warning {{braces around scalar initializer}}
+  }
+  } // namespace StringLiterals
+#endif
 } // dr1467
 
 namespace dr1490 {  // dr1490: 3.7 c++11
Index: clang/lib/Sema/SemaOverload.cpp
===
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -4984,12 +4984,12 @@
  InOverloadResolution,
  AllowObjCWritebackConversion);
 }
-// FIXME: Check the other conditions here: array of character type,
-// initializer is a string literal.
-if (ToType->isArrayType()) {
+
+if (ToType->isArrayType() && ToType->isCharType() &&
+isa(From->getInit(0))) {
   InitializedEntity Entity =
-InitializedEntity::InitializeParameter(S.Context, ToType,
-   /*Consumed=*/false);
+  InitializedEntity::InitializeParameter(S.Context, ToType,
+ /*Consumed=*/false);
   if (S.CanPerformCopyInitialization(Entity, From)) {
 Result.setStandard();
 Result.Standard.setAsIdentityConversion();


Index: clang/test/CXX/drs/dr14xx.cpp
===
--- clang/test/CXX/drs/dr14xx.cpp
+++ clang/test/CXX/drs/dr14xx.cpp
@@ -334,6 +334,22 @@
 
 X x;
 X x2{x};
+
+void f1(int);
+void f1(std::initializer_list);
+void g1() { f1({42}); }
+
+template 
+struct Pair {
+  Pair(T, U);
+};
+struct String {
+  String(const char *);
+};
+
+void f2(Pair);
+void f2(std::initializer_list);
+void g2() { f2({"foo", "bar"}); }
   } // dr_example
 
   namespace nonaggregate {
@@ -379,6 +395,31 @@
 struct Value { Value(Pair); Value(TwoPairs); };
 void f() { Value{{{1,2},{3,4}}}; }
   }
+  namespace NonAmbiguous {
+  // The original implementation made this case ambigious due to the special
+  // handling of one element initialization lists.
+  void f(int(&&)[1]);
+  void f(unsigned(&&)[1]);
+
+  void g(unsigned i) {
+f({i});
+  }
+  } // namespace NonAmbiguous
+
+#if __cplusplus >= 201103L
+  namespace StringLiterals {
+  void f(const char[4]);
+  void f(const wchar_t[4]);
+  void f(const char16_t[4]);
+  void f(const char32_t[4]);
+  void g() {
+f({"abc"}); // expected-warning {{braces around scalar initializer}}
+f({L"abc"});// expected-warning {{braces around scalar initializer}}
+f({uR"(abc)"}); // expected-warning {{braces around scalar initializer}}
+f({UR"(abc)"}); // expected-warning {{braces around scalar initializer}}
+  }
+  } // namespace StringLiterals