Is it right to only change the behavior of this caller? Presumably other callers (like getSpellingColumnNumber, getExpansionColumnNumber, etc) probably want the same handling? Do any callers /not/ want this behavior?
On Thu, Jun 1, 2017 at 3:14 AM Erik Verbruggen via Phabricator via cfe-commits <cfe-commits@lists.llvm.org> wrote: > erikjv created this revision. > > Previously, the column number in a diagnostic would be the byte position > in the line. This results in incorrect column numbers when a multi-byte > UTF-8 character would be present in the input. By ignoring all bytes > starting with 0b10.... the correct column number is created. > > This fixes PR21144. > > > https://reviews.llvm.org/D33765 > > Files: > include/clang/Basic/SourceManager.h > lib/Basic/SourceManager.cpp > test/Misc/diag-utf8.cpp > > > Index: test/Misc/diag-utf8.cpp > =================================================================== > --- /dev/null > +++ test/Misc/diag-utf8.cpp > @@ -0,0 +1,11 @@ > +// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s > + > +struct Foo { int member; }; > + > +void f(Foo foo) > +{ > + "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: > invalid operands to binary expression ('const char *' and 'Foo') > + "ideƫen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: > invalid operands to binary expression ('const char *' and 'Foo') > +} > + > + > Index: lib/Basic/SourceManager.cpp > =================================================================== > --- lib/Basic/SourceManager.cpp > +++ lib/Basic/SourceManager.cpp > @@ -1055,11 +1055,22 @@ > return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second); > } > > +static unsigned correctForMultiByteChars(const char *Buf, unsigned > LineStart, > + unsigned Column) > +{ > + unsigned CorrectedColumn = Column; > + for (unsigned I = 0; I < Column; ++I) { > + if ((Buf[LineStart + I] & 0xc0) == 0x80) > + --CorrectedColumn; > + } > + return CorrectedColumn; > +} > > /// getColumnNumber - Return the column # for the specified file position. > /// this is significantly cheaper to compute than the line number. > unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos, > - bool *Invalid) const { > + bool *Invalid, > + bool BytePosition) const { > bool MyInvalid = false; > llvm::MemoryBuffer *MemBuf = getBuffer(FID, &MyInvalid); > if (Invalid) > @@ -1093,14 +1104,18 @@ > if (Buf[FilePos - 1] == '\r' || Buf[FilePos - 1] == '\n') > --FilePos; > } > - return FilePos - LineStart + 1; > + unsigned Column = FilePos - LineStart + 1; > + return BytePosition ? Column : correctForMultiByteChars(Buf, > LineStart, > + Column); > } > } > > unsigned LineStart = FilePos; > while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != > '\r') > --LineStart; > - return FilePos-LineStart+1; > + unsigned Column = FilePos - LineStart + 1; > + return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart, > + Column); > } > > // isInvalid - Return the result of calling loc.isInvalid(), and > @@ -1425,7 +1440,8 @@ > unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second, > &Invalid); > if (Invalid) > return PresumedLoc(); > - unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, > &Invalid); > + unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, > &Invalid, > + false); > if (Invalid) > return PresumedLoc(); > > Index: include/clang/Basic/SourceManager.h > =================================================================== > --- include/clang/Basic/SourceManager.h > +++ include/clang/Basic/SourceManager.h > @@ -1275,7 +1275,8 @@ > /// on a file sloc, so you must choose a spelling or expansion location > /// before calling this method. > unsigned getColumnNumber(FileID FID, unsigned FilePos, > - bool *Invalid = nullptr) const; > + bool *Invalid = nullptr, > + bool BytePosition = true) const; > unsigned getSpellingColumnNumber(SourceLocation Loc, > bool *Invalid = nullptr) const; > unsigned getExpansionColumnNumber(SourceLocation Loc, > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits