sammccall updated this revision to Diff 149682.
sammccall added a comment.

clearerr() on (partial) successful read, and recover from EAGAIN during fread.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D47643

Files:
  clangd/ClangdLSPServer.cpp
  clangd/ClangdLSPServer.h
  clangd/JSONRPCDispatcher.cpp
  clangd/JSONRPCDispatcher.h
  clangd/tool/ClangdMain.cpp
  test/clangd/too_large.test

Index: test/clangd/too_large.test
===================================================================
--- test/clangd/too_large.test
+++ test/clangd/too_large.test
@@ -4,4 +4,4 @@
 #
 Content-Length: 2147483648
 
-# STDERR: Skipped overly large message
+# STDERR: Refusing to read message
Index: clangd/tool/ClangdMain.cpp
===================================================================
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -238,5 +238,5 @@
   llvm::set_thread_name("clangd.main");
   // Change stdin to binary to not lose \r\n on windows.
   llvm::sys::ChangeStdinToBinary();
-  return LSPServer.run(std::cin, InputStyle) ? 0 : NoShutdownRequestErrorCode;
+  return LSPServer.run(stdin, InputStyle) ? 0 : NoShutdownRequestErrorCode;
 }
Index: clangd/JSONRPCDispatcher.h
===================================================================
--- clangd/JSONRPCDispatcher.h
+++ clangd/JSONRPCDispatcher.h
@@ -102,7 +102,9 @@
 /// if it is.
 /// Input stream(\p In) must be opened in binary mode to avoid preliminary
 /// replacements of \r\n with \n.
-void runLanguageServerLoop(std::istream &In, JSONOutput &Out,
+/// We use C-style FILE* for reading as std::istream has unclear interaction
+/// with signals, which are sent by debuggers on some OSs.
+void runLanguageServerLoop(std::FILE *In, JSONOutput &Out,
                            JSONStreamStyle InputStyle,
                            JSONRPCDispatcher &Dispatcher, bool &IsDone);
 
Index: clangd/JSONRPCDispatcher.cpp
===================================================================
--- clangd/JSONRPCDispatcher.cpp
+++ clangd/JSONRPCDispatcher.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Chrono.h"
+#include "llvm/Support/Errno.h"
 #include "llvm/Support/SourceMgr.h"
 #include <istream>
 
@@ -66,7 +67,7 @@
     Outs << "Content-Length: " << S.size() << "\r\n\r\n" << S;
     Outs.flush();
   }
-  log(llvm::Twine("--> ") + S);
+  log(llvm::Twine("--> ") + S + "\n");
 }
 
 void JSONOutput::log(const Twine &Message) {
@@ -180,47 +181,55 @@
   return true;
 }
 
-static llvm::Optional<std::string> readStandardMessage(std::istream &In,
+// Tries to read a line up to and including \n.
+// If failing, feof() or ferror() will be set.
+static bool readLine(std::FILE *In, std::string &Out) {
+  static constexpr int BufSize = 1024;
+  size_t Size = 0;
+  Out.clear();
+  for (;;) {
+    Out.resize(Size + BufSize);
+    // Handle EINTR which is sent when a debugger attaches on some platforms.
+    if (!llvm::sys::RetryAfterSignal(nullptr, ::fgets, &Out[Size], BufSize, In))
+      return false;
+    clearerr(In);
+    size_t Read = std::strlen(&Out[Size]);
+    if (Read < BufSize - 1) {
+      Out.resize(Size + Read);
+      return true;
+    }
+    Size += Read;
+  }
+}
+
+// Returns None when:
+//  - ferror() or feof() are set.
+//  - Content-Length is missing or empty (protocol error)
+static llvm::Optional<std::string> readStandardMessage(std::FILE *In,
                                                        JSONOutput &Out) {
   // A Language Server Protocol message starts with a set of HTTP headers,
   // delimited  by \r\n, and terminated by an empty line (\r\n).
   unsigned long long ContentLength = 0;
-  while (In.good()) {
-    std::string Line;
-    std::getline(In, Line);
-    if (!In.good() && errno == EINTR) {
-      In.clear();
-      continue;
-    }
+  std::string Line;
+  while (true) {
+    if (feof(In) || ferror(In) || !readLine(In, Line))
+      return llvm::None;
 
     Out.mirrorInput(Line);
-    // Mirror '\n' that gets consumed by std::getline, but is not included in
-    // the resulting Line.
-    // Note that '\r' is part of Line, so we don't need to mirror it
-    // separately.
-    if (!In.eof())
-      Out.mirrorInput("\n");
-
     llvm::StringRef LineRef(Line);
 
     // We allow comments in headers. Technically this isn't part
     // of the LSP specification, but makes writing tests easier.
     if (LineRef.startswith("#"))
       continue;
 
-    // Content-Type is a specified header, but does nothing.
-    // Content-Length is a mandatory header. It specifies the length of the
-    // following JSON.
-    // It is unspecified what sequence headers must be supplied in, so we
-    // allow any sequence.
-    // The end of headers is signified by an empty line.
+    // Content-Length is a mandatory header, and the only one we handle.
     if (LineRef.consume_front("Content-Length: ")) {
       if (ContentLength != 0) {
         log("Warning: Duplicate Content-Length header received. "
             "The previous value for this message (" +
-            llvm::Twine(ContentLength) + ") was ignored.\n");
+            llvm::Twine(ContentLength) + ") was ignored.");
       }
-
       llvm::getAsUnsignedInteger(LineRef.trim(), 0, ContentLength);
       continue;
     } else if (!LineRef.trim().empty()) {
@@ -233,46 +242,45 @@
     }
   }
 
-  // Guard against large messages. This is usually a bug in the client code
-  // and we don't want to crash downstream because of it.
+  // The fuzzer likes crashing us by sending "Content-Length: 9999999999999999"
   if (ContentLength > 1 << 30) { // 1024M
-    In.ignore(ContentLength);
-    log("Skipped overly large message of " + Twine(ContentLength) +
-        " bytes.\n");
+    log("Refusing to read message with long Content-Length: " +
+        Twine(ContentLength) + ". Expect protocol errors.");
+    return llvm::None;
+  }
+  if (ContentLength == 0) {
+    log("Warning: Missing Content-Length header, or zero-length message.");
     return llvm::None;
   }
 
-  if (ContentLength > 0) {
-    std::string JSON(ContentLength, '\0');
-    In.read(&JSON[0], ContentLength);
-    Out.mirrorInput(StringRef(JSON.data(), In.gcount()));
-
-    // If the stream is aborted before we read ContentLength bytes, In
-    // will have eofbit and failbit set.
-    if (!In) {
-      log("Input was aborted. Read only " + llvm::Twine(In.gcount()) +
-          " bytes of expected " + llvm::Twine(ContentLength) + ".\n");
+  std::string JSON(ContentLength, '\0');
+  for (size_t Pos = 0, Read; Pos < ContentLength; Pos += Read) {
+    // Handle EINTR which is sent when a debugger attaches on some platforms.
+    Read = llvm::sys::RetryAfterSignal(0u, ::fread, &JSON[Pos], 1,
+                                       ContentLength - Pos, In);
+    Out.mirrorInput(StringRef(&JSON[Pos], Read));
+    if (Read == 0) {
+      log("Input was aborted. Read only " + llvm::Twine(Pos) +
+          " bytes of expected " + llvm::Twine(ContentLength) + ".");
       return llvm::None;
     }
-    return std::move(JSON);
-  } else {
-    log("Warning: Missing Content-Length header, or message has zero "
-        "length.\n");
-    return llvm::None;
+    clearerr(In); // If we're done, the error was transient. If we're not done,
+                  // either it was transient or we'll see it again on retry.
+    Pos += Read;
   }
+  return std::move(JSON);
 }
 
 // For lit tests we support a simplified syntax:
 // - messages are delimited by '---' on a line by itself
 // - lines starting with # are ignored.
 // This is a testing path, so favor simplicity over performance here.
-static llvm::Optional<std::string> readDelimitedMessage(std::istream &In,
+// When returning None, feof() or ferror() will be set.
+static llvm::Optional<std::string> readDelimitedMessage(std::FILE *In,
                                                         JSONOutput &Out) {
   std::string JSON;
   std::string Line;
-  while (std::getline(In, Line)) {
-    Line.push_back('\n'); // getline() consumed the newline.
-
+  while (readLine(In, Line)) {
     auto LineRef = llvm::StringRef(Line).trim();
     if (LineRef.startswith("#")) // comment
       continue;
@@ -284,39 +292,47 @@
     JSON += Line;
   }
 
-  if (In.bad()) {
+  if (ferror(In)) {
     log("Input error while reading message!");
     return llvm::None;
-  } else {
+  } else { // Including EOF
     Out.mirrorInput(
         llvm::formatv("Content-Length: {0}\r\n\r\n{1}", JSON.size(), JSON));
     return std::move(JSON);
   }
 }
 
-void clangd::runLanguageServerLoop(std::istream &In, JSONOutput &Out,
+// The use of C-style std::FILE* IO deserves some explanation.
+// Previously, std::istream was used. When a debugger attached on MacOS, the
+// process received EINTR, the stream went bad, and clangd exited.
+// A retry-on-EINTR loop around reads solved this problem, but caused clangd to
+// sometimes hang rather than exit on other OSes. The interaction between
+// istreams and signals isn't well-specified, so it's hard to get this right.
+// The C APIs seem to be clearer in this respect.
+void clangd::runLanguageServerLoop(std::FILE *In, JSONOutput &Out,
                                    JSONStreamStyle InputStyle,
                                    JSONRPCDispatcher &Dispatcher,
                                    bool &IsDone) {
   auto &ReadMessage =
       (InputStyle == Delimited) ? readDelimitedMessage : readStandardMessage;
-  while (In.good()) {
+  while (!IsDone && !feof(In)) {
+    if (ferror(In)) {
+      log("IO error: " + llvm::sys::StrError());
+      return;
+    }
     if (auto JSON = ReadMessage(In, Out)) {
       if (auto Doc = json::parse(*JSON)) {
         // Log the formatted message.
         log(llvm::formatv(Out.Pretty ? "<-- {0:2}\n" : "<-- {0}\n", *Doc));
         // Finally, execute the action for this JSON message.
         if (!Dispatcher.call(*Doc, Out))
-          log("JSON dispatch failed!\n");
+          log("JSON dispatch failed!");
       } else {
         // Parse error. Log the raw message.
         log(llvm::formatv("<-- {0}\n" , *JSON));
         log(llvm::Twine("JSON parse error: ") +
-            llvm::toString(Doc.takeError()) + "\n");
+            llvm::toString(Doc.takeError()));
       }
     }
-    // If we're done, exit the loop.
-    if (IsDone)
-      break;
   }
 }
Index: clangd/ClangdLSPServer.h
===================================================================
--- clangd/ClangdLSPServer.h
+++ clangd/ClangdLSPServer.h
@@ -42,8 +42,8 @@
   /// class constructor. This method must not be executed more than once for
   /// each instance of ClangdLSPServer.
   ///
-  /// \return Wether we received a 'shutdown' request before an 'exit' request
-  bool run(std::istream &In,
+  /// \return Whether we received a 'shutdown' request before an 'exit' request.
+  bool run(std::FILE *In,
            JSONStreamStyle InputStyle = JSONStreamStyle::Standard);
 
 private:
Index: clangd/ClangdLSPServer.cpp
===================================================================
--- clangd/ClangdLSPServer.cpp
+++ clangd/ClangdLSPServer.cpp
@@ -396,7 +396,7 @@
       SupportedSymbolKinds(defaultSymbolKinds()),
       Server(CDB, FSProvider, /*DiagConsumer=*/*this, Opts) {}
 
-bool ClangdLSPServer::run(std::istream &In, JSONStreamStyle InputStyle) {
+bool ClangdLSPServer::run(std::FILE *In, JSONStreamStyle InputStyle) {
   assert(!IsDone && "Run was called before");
 
   // Set up JSONRPCDispatcher.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to