zixuw updated this revision to Diff 408027.
zixuw added a comment.

Fix a test failure on Windows where diff doesn't recognize the "-a" option.

  rG LLVM Github Monorepo




Index: clang/test/SymbolGraph/global_record.c
--- /dev/null
+++ clang/test/SymbolGraph/global_record.c
@@ -0,0 +1,363 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s@INPUT_DIR@%t@g" %t/reference.output.json.in >> \
+// RUN: %t/reference.output.json
+// RUN: %clang -extract-api -target arm64-apple-macosx \
+// RUN: %t/input.c -o %t/output.json | FileCheck -allow-empty %s
+// RUN: diff %t/reference.output.json %t/output.json
+// CHECK-NOT: error:
+// CHECK-NOT: warning:
+//--- input.c
+int num;
+ * \brief Add two numbers.
+ * \param [in]  x   A number.
+ * \param [in]  y   Another number.
+ * \param [out] res The result of x + y.
+ */
+void add(const int x, const int y, int *res);
+//--- reference.output.json.in
+  "metadata": {
+    "formatVersion": {
+      "major": 0,
+      "minor": 5,
+      "patch": 3
+    },
+    "generator": "clang"
+  },
+  "module": {
+    "name": "",
+    "platform": {
+      "architecture": "arm64",
+      "operatingSystem": {
+        "minimumVersion": {
+          "major": 11,
+          "minor": 0,
+          "patch": 0
+        },
+        "name": "macosx"
+      },
+      "vendor": "apple"
+    }
+  },
+  "relationhips": [],
+  "symbols": [
+    {
+      "declaration": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "num"
+        }
+      ],
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@num"
+      },
+      "kind": {
+        "displayName": "Variable",
+        "identifier": "c.variable"
+      },
+      "location": {
+        "character": 5,
+        "line": 1,
+        "uri": "file://INPUT_DIR/input.c"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "num"
+          }
+        ],
+        "title": "num"
+      }
+    },
+    {
+      "declaration": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:v",
+          "spelling": "void"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "add"
+        },
+        {
+          "kind": "text",
+          "spelling": "("
+        },
+        {
+          "kind": "keyword",
+          "spelling": "const"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "x"
+        },
+        {
+          "kind": "text",
+          "spelling": ", "
+        },
+        {
+          "kind": "keyword",
+          "spelling": "const"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "y"
+        },
+        {
+          "kind": "text",
+          "spelling": ", "
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " *"
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "res"
+        },
+        {
+          "kind": "text",
+          "spelling": ")"
+        }
+      ],
+      "docComment": {
+        "lines": [
+          {
+            "range": {
+              "end": {
+                "character": 4,
+                "line": 3
+              },
+              "start": {
+                "character": 4,
+                "line": 3
+              }
+            },
+            "text": ""
+          },
+          {
+            "range": {
+              "end": {
+                "character": 27,
+                "line": 4
+              },
+              "start": {
+                "character": 3,
+                "line": 4
+              }
+            },
+            "text": " \\brief Add two numbers."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 30,
+                "line": 5
+              },
+              "start": {
+                "character": 3,
+                "line": 5
+              }
+            },
+            "text": " \\param [in]  x   A number."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 36,
+                "line": 6
+              },
+              "start": {
+                "character": 3,
+                "line": 6
+              }
+            },
+            "text": " \\param [in]  y   Another number."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 41,
+                "line": 7
+              },
+              "start": {
+                "character": 3,
+                "line": 7
+              }
+            },
+            "text": " \\param [out] res The result of x + y."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 4,
+                "line": 8
+              },
+              "start": {
+                "character": 1,
+                "line": 8
+              }
+            },
+            "text": " "
+          }
+        ]
+      },
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@F@add"
+      },
+      "kind": {
+        "displayName": "Function",
+        "identifier": "c.function"
+      },
+      "location": {
+        "character": 6,
+        "line": 9,
+        "uri": "file://INPUT_DIR/input.c"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "add"
+          }
+        ],
+        "title": "add"
+      },
+      "parameters": {
+        "parameters": [
+          {
+            "declaration": [
+              {
+                "kind": "keyword",
+                "spelling": "const"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:I",
+                "spelling": "int"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "x"
+              }
+            ],
+            "name": "x"
+          },
+          {
+            "declaration": [
+              {
+                "kind": "keyword",
+                "spelling": "const"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:I",
+                "spelling": "int"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "y"
+              }
+            ],
+            "name": "y"
+          },
+          {
+            "declaration": [
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:I",
+                "spelling": "int"
+              },
+              {
+                "kind": "text",
+                "spelling": " *"
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "res"
+              }
+            ],
+            "name": "res"
+          }
+        ],
+        "returns": [
+          {
+            "kind": "typeIdentifier",
+            "preciseIdentifier": "c:v",
+            "spelling": "void"
+          }
+        ]
+      }
+    }
+  ]
Index: clang/test/Driver/extract-api.c
--- clang/test/Driver/extract-api.c
+++ clang/test/Driver/extract-api.c
@@ -8,9 +8,3 @@
 // EXTRACT-API-PHASES: 2: compiler, {1}, api-information
-// FIXME: Check for the dummy output now to verify that the custom action was executed.
-// RUN: %clang -extract-api %s | FileCheck -check-prefix DUMMY-OUTPUT %s
-void dummy_function(void);
-// DUMMY-OUTPUT: dummy_function
Index: clang/lib/SymbolGraph/Serialization.cpp
--- /dev/null
+++ clang/lib/SymbolGraph/Serialization.cpp
@@ -0,0 +1,317 @@
+//===- SymbolGraph/Serialization.cpp ----------------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines the SymbolGraph serializer and parser.
+#include "clang/SymbolGraph/Serialization.h"
+#include "clang/SymbolGraph/API.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace clang;
+using namespace clang::symbolgraph;
+using namespace llvm;
+using namespace llvm::json;
+namespace {
+static void serializeObject(Object &Paren, StringRef Key,
+                            Optional<Object> Obj) {
+  if (Obj)
+    Paren[Key] = std::move(Obj.getValue());
+static void serializeArray(Object &Paren, StringRef Key,
+                           Optional<Array> Array) {
+  if (Array)
+    Paren[Key] = std::move(Array.getValue());
+// SymbolGraph: SemanticVersion
+static Optional<Object> serializeSemanticVersion(const VersionTuple &V) {
+  if (V.empty())
+    return None;
+  Object Version;
+  Version["major"] = V.getMajor();
+  Version["minor"] = V.getMinor().getValueOr(0);
+  Version["patch"] = V.getSubminor().getValueOr(0);
+  return Version;
+static Object serializeOperatingSystem(const Triple &T) {
+  Object OS;
+  OS["name"] = T.getOSTypeName(T.getOS());
+  serializeObject(OS, "minimumVersion",
+                  serializeSemanticVersion(T.getMinimumSupportedOSVersion()));
+  return OS;
+// SymbolGraph: Platform
+static Object serializePlatform(const Triple &T) {
+  Object Platform;
+  Platform["architecture"] = T.getArchName();
+  Platform["vendor"] = T.getVendorName();
+  Platform["operatingSystem"] = serializeOperatingSystem(T);
+  return Platform;
+// SymbolGraph: SourcePosition
+static Object serializeSourcePosition(const PresumedLoc &Loc,
+                                      bool IncludeFileURI = false) {
+  assert(Loc.isValid() && "invalid source position");
+  Object SourcePosition;
+  SourcePosition["line"] = Loc.getLine();
+  SourcePosition["character"] = Loc.getColumn();
+  if (IncludeFileURI) {
+    std::string FileURI = "file://";
+    FileURI += Loc.getFilename();
+    SourcePosition["uri"] = FileURI;
+  }
+  return SourcePosition;
+// SymbolGraph: SourceRange
+static Object serializeSourceRange(const PresumedLoc &BeginLoc,
+                                   const PresumedLoc &EndLoc) {
+  Object SourceRange;
+  serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc));
+  serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc));
+  return SourceRange;
+// SymbolGraph: AvailabilityItem
+static Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) {
+  if (Avail.isDefault())
+    return None;
+  Object Availbility;
+  serializeObject(Availbility, "introducedVersion",
+                  serializeSemanticVersion(Avail.Introduced));
+  serializeObject(Availbility, "deprecatedVersion",
+                  serializeSemanticVersion(Avail.Deprecated));
+  serializeObject(Availbility, "obsoletedVersion",
+                  serializeSemanticVersion(Avail.Obsoleted));
+  if (Avail.isUnavailable())
+    Availbility["isUnconditionallyUnavailable"] = true;
+  if (Avail.isUnconditionallyDeprecated())
+    Availbility["isUnconditionallyDeprecated"] = true;
+  return Availbility;
+static StringRef getLanguageName(const LangOptions &LangOpts) {
+  auto Language =
+      LangStandard::getLangStandardForKind(LangOpts.LangStd).getLanguage();
+  switch (Language) {
+  case Language::C:
+    return "c";
+  case Language::ObjC:
+    return "objc";
+  // Unsupported language currently
+  case Language::CXX:
+  case Language::ObjCXX:
+  case Language::OpenCL:
+  case Language::OpenCLCXX:
+  case Language::CUDA:
+  case Language::RenderScript:
+  case Language::HIP:
+  // Languages that the frontend cannot parse and compile
+  case Language::Unknown:
+  case Language::Asm:
+  case Language::LLVM_IR:
+    llvm_unreachable("Unsupported language kind");
+  }
+  llvm_unreachable("Unhandled language kind");
+// SymbolGraph: Symbol::identifier
+static Object serializeIdentifier(const APIRecord &Record,
+                                  const LangOptions &LangOpts) {
+  Object Identifier;
+  Identifier["precise"] = Record.USR;
+  Identifier["interfaceLanguage"] = getLanguageName(LangOpts);
+  return Identifier;
+// SymbolGraph: DocComment
+static Optional<Object> serializeDocComment(const DocComment &Comment) {
+  if (Comment.empty())
+    return None;
+  Object DocComment;
+  Array LinesArray;
+  for (const auto &CommentLine : Comment) {
+    Object Line;
+    Line["text"] = CommentLine.Text;
+    serializeObject(Line, "range",
+                    serializeSourceRange(CommentLine.Begin, CommentLine.End));
+    LinesArray.emplace_back(std::move(Line));
+  }
+  serializeArray(DocComment, "lines", LinesArray);
+  return DocComment;
+static Optional<Array>
+serializeDeclarationFragments(const DeclarationFragments &DF) {
+  if (DF.getFragments().empty())
+    return None;
+  Array Fragments;
+  for (const auto &F : DF.getFragments()) {
+    Object Fragment;
+    Fragment["spelling"] = F.Spelling;
+    Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind);
+    if (!F.PreciseIdentifier.empty())
+      Fragment["preciseIdentifier"] = F.PreciseIdentifier;
+    Fragments.emplace_back(std::move(Fragment));
+  }
+  return Fragments;
+static Optional<Object>
+serializeFunctionSignature(const FunctionSignature &FS) {
+  if (FS.empty())
+    return None;
+  Object Signature;
+  serializeArray(Signature, "returns",
+                 serializeDeclarationFragments(FS.getReturnType()));
+  Array Parameters;
+  for (const auto &P : FS.getParameters()) {
+    Object Parameter;
+    Parameter["name"] = P.Name;
+    serializeArray(Parameter, "declaration",
+                   serializeDeclarationFragments(P.Fragments));
+    Parameters.emplace_back(std::move(Parameter));
+  }
+  if (!Parameters.empty())
+    Signature["parameters"] = std::move(Parameters);
+  return Signature;
+static Object serializeNames(const APIRecord &Record) {
+  Object Names;
+  Names["title"] = Record.Name;
+  serializeArray(Names, "subHeading",
+                 serializeDeclarationFragments(Record.SubHeading));
+  return Names;
+// SymbolGraph: Symbol::kind
+static Object serializeSymbolKind(const APIRecord &Record,
+                                  const LangOptions &LangOpts) {
+  Object Kind;
+  switch (Record.getKind()) {
+  case APIRecord::RK_Global:
+    auto *GR = dyn_cast<GlobalRecord>(&Record);
+    switch (GR->GlobalKind) {
+    case GVKind::Function:
+      Kind["identifier"] = (getLanguageName(LangOpts) + ".function").str();
+      Kind["displayName"] = "Function";
+      break;
+    case GVKind::Variable:
+      Kind["identifier"] = (getLanguageName(LangOpts) + ".variable").str();
+      Kind["displayName"] = "Variable";
+      break;
+    case GVKind::Unknown:
+      // Unknown global kind
+      break;
+    }
+    break;
+  }
+  return Kind;
+} // namespace
+const VersionTuple Serializer::FormatVersion{0, 5, 3};
+Object Serializer::serializeMetadata() const {
+  Object Metadata;
+  serializeObject(Metadata, "formatVersion",
+                  serializeSemanticVersion(FormatVersion));
+  Metadata["generator"] = "clang";
+  return Metadata;
+Object Serializer::serializeModule() const {
+  Object Module;
+  // FIXME: What to put in here?
+  Module["name"] = "";
+  serializeObject(Module, "platform", serializePlatform(API.getTarget()));
+  return Module;
+Object Serializer::serializeAPIRecord(const APIRecord &Record) const {
+  Object Obj;
+  serializeObject(Obj, "identifier",
+                  serializeIdentifier(Record, API.getLangOpts()));
+  serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLangOpts()));
+  serializeObject(Obj, "names", serializeNames(Record));
+  serializeObject(
+      Obj, "location",
+      serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true));
+  serializeObject(Obj, "availbility",
+                  serializeAvailability(Record.Availability));
+  serializeObject(Obj, "docComment", serializeDocComment(Record.Comment));
+  serializeArray(Obj, "declaration",
+                 serializeDeclarationFragments(Record.Declaration));
+  return Obj;
+void Serializer::serializeGlobalRecord(const GlobalRecord &Record) {
+  Object Obj = serializeAPIRecord(Record);
+  if (Record.GlobalKind == GVKind::Function)
+    serializeObject(Obj, "parameters",
+                    serializeFunctionSignature(Record.Signature));
+  Symbols.emplace_back(std::move(Obj));
+Object Serializer::serialize() {
+  Object Root;
+  serializeObject(Root, "metadata", serializeMetadata());
+  serializeObject(Root, "module", serializeModule());
+  for (const auto &Global : API.getGlobals())
+    serializeGlobalRecord(*Global.second);
+  Root["symbols"] = std::move(Symbols);
+  Root["relationhips"] = std::move(Relationships);
+  return Root;
+void Serializer::serialize(raw_ostream &os) {
+  Object root = serialize();
+  if (Options.Compact)
+    os << formatv("{0}", Value(std::move(root))) << "\n";
+  else
+    os << formatv("{0:2}", Value(std::move(root))) << "\n";
Index: clang/lib/SymbolGraph/DeclarationFragments.cpp
--- /dev/null
+++ clang/lib/SymbolGraph/DeclarationFragments.cpp
@@ -0,0 +1,434 @@
+//===- SymbolGraph/DeclarationFragments.cpp ---------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines SymbolGraph Declaration Fragments related classes.
+#include "clang/SymbolGraph/DeclarationFragments.h"
+#include "clang/Index/USRGeneration.h"
+#include "llvm/ADT/StringSwitch.h"
+namespace clang {
+namespace symbolgraph {
+DeclarationFragments &DeclarationFragments::appendSpace() {
+  if (!Fragments.empty()) {
+    Fragment Last = Fragments.back();
+    if (Last.Kind == FragmentKind::Text) {
+      if (Last.Spelling.back() != ' ') {
+        Last.Spelling.push_back(' ');
+      }
+    } else {
+      append(" ", FragmentKind::Text);
+    }
+  }
+  return *this;
+StringRef DeclarationFragments::getFragmentKindString(
+    DeclarationFragments::FragmentKind Kind) {
+  switch (Kind) {
+  case DeclarationFragments::FragmentKind::None:
+    return "none";
+  case DeclarationFragments::FragmentKind::Keyword:
+    return "keyword";
+  case DeclarationFragments::FragmentKind::Attribute:
+    return "attribute";
+  case DeclarationFragments::FragmentKind::NumberLiteral:
+    return "number";
+  case DeclarationFragments::FragmentKind::StringLiteral:
+    return "string";
+  case DeclarationFragments::FragmentKind::Identifier:
+    return "identifier";
+  case DeclarationFragments::FragmentKind::TypeIdentifier:
+    return "typeIdentifier";
+  case DeclarationFragments::FragmentKind::GenericParameter:
+    return "genericParam";
+  case DeclarationFragments::FragmentKind::ExternalParam:
+    return "externalParam";
+  case DeclarationFragments::FragmentKind::InternalParam:
+    return "internalParam";
+  case DeclarationFragments::FragmentKind::Text:
+    return "text";
+  }
+  llvm_unreachable("Unhandled FragmentKind");
+DeclarationFragments::parseFragmentKindFromString(StringRef S) {
+  return llvm::StringSwitch<FragmentKind>(S)
+      .Case("keyword", DeclarationFragments::FragmentKind::Keyword)
+      .Case("attribute", DeclarationFragments::FragmentKind::Attribute)
+      .Case("number", DeclarationFragments::FragmentKind::NumberLiteral)
+      .Case("string", DeclarationFragments::FragmentKind::StringLiteral)
+      .Case("identifier", DeclarationFragments::FragmentKind::Identifier)
+      .Case("typeIdentifier",
+            DeclarationFragments::FragmentKind::TypeIdentifier)
+      .Case("genericParam",
+            DeclarationFragments::FragmentKind::GenericParameter)
+      .Case("internalParam", DeclarationFragments::FragmentKind::InternalParam)
+      .Case("externalParam", DeclarationFragments::FragmentKind::ExternalParam)
+      .Case("text", DeclarationFragments::FragmentKind::Text)
+      .Default(DeclarationFragments::FragmentKind::None);
+// NNS stores C++ nested name specifiers, which are prefixes to qualified names.
+// Build declaration fragments for NNS recursively so that we have the USR for
+// every part in a qualified name, and also leaves the actual underlying type
+// cleaner for its own fragment.
+DeclarationFragmentsBuilder::getFragmentsForNNS(const NestedNameSpecifier *NNS,
+                                                ASTContext &Context,
+                                                DeclarationFragments &After) {
+  DeclarationFragments Fragments;
+  if (NNS->getPrefix())
+    Fragments.append(getFragmentsForNNS(NNS->getPrefix(), Context, After));
+  switch (NNS->getKind()) {
+  case NestedNameSpecifier::Identifier:
+    Fragments.append(NNS->getAsIdentifier()->getName(),
+                     DeclarationFragments::FragmentKind::Identifier);
+    break;
+  case NestedNameSpecifier::Namespace: {
+    const NamespaceDecl *NS = NNS->getAsNamespace();
+    if (NS->isAnonymousNamespace())
+      return Fragments;
+    SmallString<128> USR;
+    index::generateUSRForDecl(NS, USR);
+    Fragments.append(NS->getName(),
+                     DeclarationFragments::FragmentKind::Identifier, USR);
+    break;
+  }
+  case NestedNameSpecifier::NamespaceAlias: {
+    const NamespaceAliasDecl *Alias = NNS->getAsNamespaceAlias();
+    SmallString<128> USR;
+    index::generateUSRForDecl(Alias, USR);
+    Fragments.append(Alias->getName(),
+                     DeclarationFragments::FragmentKind::Identifier, USR);
+    break;
+  }
+  case NestedNameSpecifier::Global:
+    // The global specifier `::` at the beginning. No stored value.
+    break;
+  case NestedNameSpecifier::Super:
+    // Microsoft's `__super` specifier.
+    Fragments.append("__super", DeclarationFragments::FragmentKind::Keyword);
+    break;
+  case NestedNameSpecifier::TypeSpecWithTemplate:
+    // A type prefixed by the `template` keyword.
+    Fragments.append("template", DeclarationFragments::FragmentKind::Keyword);
+    Fragments.appendSpace();
+    // Fallthrough after adding the keyword to handle the actual type.
+  case NestedNameSpecifier::TypeSpec: {
+    const Type *T = NNS->getAsType();
+    // FIXME: Handle C++ template specialization type
+    Fragments.append(getFragmentsForType(T, Context, After));
+    break;
+  }
+  }
+  // Add the separator text `::` for this segment.
+  return Fragments.append("::", DeclarationFragments::FragmentKind::Text);
+// Recursively build the declaration fragments for an underlying `Type` with
+// qualifiers removed.
+DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType(
+    const Type *T, ASTContext &Context, DeclarationFragments &After) {
+  assert(T && "invalid type");
+  DeclarationFragments Fragments;
+  // Declaration fragments of a pointer type is the declaration fragments of
+  // the pointee type followed by a `*`, except for Objective-C `id` and `Class`
+  // pointers, where we do not spell out the `*`.
+  if (T->isPointerType() ||
+      (T->isObjCObjectPointerType() &&
+       !T->getAs<ObjCObjectPointerType>()->isObjCIdOrClassType())) {
+    return Fragments
+        .append(getFragmentsForType(T->getPointeeType(), Context, After))
+        .append(" *", DeclarationFragments::FragmentKind::Text);
+  }
+  // Declaration fragments of a lvalue reference type is the declaration
+  // fragments of the underlying type followed by a `&`.
+  if (const LValueReferenceType *LRT = dyn_cast<LValueReferenceType>(T))
+    return Fragments
+        .append(
+            getFragmentsForType(LRT->getPointeeTypeAsWritten(), Context, After))
+        .append(" &", DeclarationFragments::FragmentKind::Text);
+  // Declaration fragments of a rvalue reference type is the declaration
+  // fragments of the underlying type followed by a `&&`.
+  if (const RValueReferenceType *RRT = dyn_cast<RValueReferenceType>(T))
+    return Fragments
+        .append(
+            getFragmentsForType(RRT->getPointeeTypeAsWritten(), Context, After))
+        .append(" &&", DeclarationFragments::FragmentKind::Text);
+  // Declaration fragments of an array-typed variable have two parts:
+  // 1. the element type of the array that appears before the variable name;
+  // 2. array brackets `[(0-9)?]` that appear after the variable name.
+  if (const ArrayType *AT = T->getAsArrayTypeUnsafe()) {
+    // Build the "after" part first because the inner element type might also
+    // be an array-type. For example `int matrix[3][4]` which has a type of
+    // "(array 3 of (array 4 of ints))."
+    // Push the array size part first to make sure they are in the right order.
+    After.append("[", DeclarationFragments::FragmentKind::Text);
+    switch (AT->getSizeModifier()) {
+    case ArrayType::Normal:
+      break;
+    case ArrayType::Static:
+      Fragments.append("static", DeclarationFragments::FragmentKind::Keyword);
+      break;
+    case ArrayType::Star:
+      Fragments.append("*", DeclarationFragments::FragmentKind::Text);
+      break;
+    }
+    if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) {
+      // FIXME: right now this would evaluate any expressions/macros written in
+      // the original source to concrete values. For example
+      // `int nums[MAX]` -> `int nums[100]`
+      // `char *str[5 + 1]` -> `char *str[6]`
+      SmallString<128> Size;
+      CAT->getSize().toStringUnsigned(Size);
+      After.append(Size, DeclarationFragments::FragmentKind::NumberLiteral);
+    }
+    After.append("]", DeclarationFragments::FragmentKind::Text);
+    return Fragments.append(
+        getFragmentsForType(AT->getElementType(), Context, After));
+  }
+  // An ElaboratedType is a sugar for types that are referred to using an
+  // elaborated keyword, e.g., `struct S`, `enum E`, or (in C++) via a
+  // qualified name, e.g., `N::M::type`, or both.
+  if (const ElaboratedType *ET = dyn_cast<ElaboratedType>(T)) {
+    ElaboratedTypeKeyword Keyword = ET->getKeyword();
+    if (Keyword != ETK_None) {
+      Fragments
+          .append(ElaboratedType::getKeywordName(Keyword),
+                  DeclarationFragments::FragmentKind::Keyword)
+          .appendSpace();
+    }
+    if (const NestedNameSpecifier *NNS = ET->getQualifier())
+      Fragments.append(getFragmentsForNNS(NNS, Context, After));
+    // After handling the elaborated keyword or qualified name, build
+    // declaration fragments for the desugared underlying type.
+    return Fragments.append(getFragmentsForType(ET->desugar(), Context, After));
+  }
+  // Everything we care about has been handled now, reduce to the canonical
+  // unqualified base type.
+  QualType Base = T->getCanonicalTypeUnqualified();
+  // Default fragment builder for other kinds of types (BuiltinType etc.)
+  SmallString<128> USR;
+  clang::index::generateUSRForType(Base, Context, USR);
+  Fragments.append(Base.getAsString(),
+                   DeclarationFragments::FragmentKind::TypeIdentifier, USR);
+  return Fragments;
+DeclarationFragmentsBuilder::getFragmentsForQualifiers(const Qualifiers Quals) {
+  DeclarationFragments Fragments;
+  if (Quals.hasConst())
+    Fragments.append("const", DeclarationFragments::FragmentKind::Keyword);
+  if (Quals.hasVolatile())
+    Fragments.append("volatile", DeclarationFragments::FragmentKind::Keyword);
+  if (Quals.hasRestrict())
+    Fragments.append("restrict", DeclarationFragments::FragmentKind::Keyword);
+  return Fragments;
+DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType(
+    const QualType QT, ASTContext &Context, DeclarationFragments &After) {
+  assert(!QT.isNull() && "invalid type");
+  if (const ParenType *PT = dyn_cast<ParenType>(QT)) {
+    After.append(")", DeclarationFragments::FragmentKind::Text);
+    return getFragmentsForType(PT->getInnerType(), Context, After)
+        .append("(", DeclarationFragments::FragmentKind::Text);
+  }
+  const SplitQualType SQT = QT.split();
+  DeclarationFragments QualsFragments = getFragmentsForQualifiers(SQT.Quals),
+                       TypeFragments =
+                           getFragmentsForType(SQT.Ty, Context, After);
+  if (QualsFragments.getFragments().empty())
+    return TypeFragments;
+  // Use east qualifier for pointer types
+  // For example:
+  // ```
+  // int *   const
+  // ^----   ^----
+  //  type    qualifier
+  // ^-----------------
+  //  const pointer to int
+  // ```
+  // should not be reconstructed as
+  // ```
+  // const       int       *
+  // ^----       ^--
+  //  qualifier   type
+  // ^----------------     ^
+  //  pointer to const int
+  // ```
+  if (SQT.Ty->isAnyPointerType())
+    return TypeFragments.appendSpace().append(std::move(QualsFragments));
+  return QualsFragments.appendSpace().append(std::move(TypeFragments));
+DeclarationFragmentsBuilder::getFragmentsForVar(const VarDecl *Var) {
+  DeclarationFragments Fragments;
+  StorageClass SC = Var->getStorageClass();
+  if (SC != SC_None)
+    Fragments
+        .append(VarDecl::getStorageClassSpecifierString(SC),
+                DeclarationFragments::FragmentKind::Keyword)
+        .appendSpace();
+  QualType T =
+      Var->getTypeSourceInfo()
+          ? Var->getTypeSourceInfo()->getType()
+          : Var->getASTContext().getUnqualifiedObjCPointerType(Var->getType());
+  // Capture potential fragments that needs to be placed after the variable name
+  // ```
+  // int nums[5];
+  // char (*ptr_to_array)[6];
+  // ```
+  DeclarationFragments After;
+  return Fragments.append(getFragmentsForType(T, Var->getASTContext(), After))
+      .appendSpace()
+      .append(Var->getName(), DeclarationFragments::FragmentKind::Identifier)
+      .append(std::move(After));
+DeclarationFragmentsBuilder::getFragmentsForParam(const ParmVarDecl *Param) {
+  DeclarationFragments Fragments, After;
+  QualType T = Param->getTypeSourceInfo()
+                   ? Param->getTypeSourceInfo()->getType()
+                   : Param->getASTContext().getUnqualifiedObjCPointerType(
+                         Param->getType());
+  DeclarationFragments TypeFragments =
+      getFragmentsForType(T, Param->getASTContext(), After);
+  if (Param->isObjCMethodParameter())
+    Fragments.append("(", DeclarationFragments::FragmentKind::Text)
+        .append(std::move(TypeFragments))
+        .append(")", DeclarationFragments::FragmentKind::Text);
+  else
+    Fragments.append(std::move(TypeFragments)).appendSpace();
+  return Fragments
+      .append(Param->getName(),
+              DeclarationFragments::FragmentKind::InternalParam)
+      .append(std::move(After));
+DeclarationFragmentsBuilder::getFragmentsForFunction(const FunctionDecl *Func) {
+  DeclarationFragments Fragments;
+  // FIXME: Handle template specialization
+  switch (Func->getStorageClass()) {
+  case SC_None:
+  case SC_PrivateExtern:
+    break;
+  case SC_Extern:
+    Fragments.append("extern", DeclarationFragments::FragmentKind::Keyword)
+        .appendSpace();
+    break;
+  case SC_Static:
+    Fragments.append("static", DeclarationFragments::FragmentKind::Keyword)
+        .appendSpace();
+    break;
+  case SC_Auto:
+  case SC_Register:
+    llvm_unreachable("invalid for functions");
+  }
+  // FIXME: Handle C++ function specifiers: constexpr, consteval, explicit, etc.
+  // FIXME: Is `after` actually needed here?
+  DeclarationFragments After;
+  Fragments
+      .append(getFragmentsForType(Func->getReturnType(), Func->getASTContext(),
+                                  After))
+      .appendSpace()
+      .append(Func->getName(), DeclarationFragments::FragmentKind::Identifier)
+      .append(std::move(After));
+  Fragments.append("(", DeclarationFragments::FragmentKind::Text);
+  for (unsigned i = 0, end = Func->getNumParams(); i != end; ++i) {
+    if (i)
+      Fragments.append(", ", DeclarationFragments::FragmentKind::Text);
+    Fragments.append(getFragmentsForParam(Func->getParamDecl(i)));
+  }
+  Fragments.append(")", DeclarationFragments::FragmentKind::Text);
+  // FIXME: Handle exception specifiers: throw, noexcept
+  return Fragments;
+DeclarationFragmentsBuilder::getFunctionSignature(const FunctionDecl *Func) {
+  FunctionSignature Signature;
+  for (const auto *Param : Func->parameters()) {
+    StringRef Name = Param->getName();
+    DeclarationFragments Fragments = getFragmentsForParam(Param);
+    Signature.addParameter(Name, Fragments);
+  }
+  DeclarationFragments After;
+  DeclarationFragments Returns =
+      getFragmentsForType(Func->getReturnType(), Func->getASTContext(), After)
+          .append(std::move(After));
+  Signature.setReturnType(Returns);
+  return Signature;
+// Subheading of a symbol defaults to its name.
+DeclarationFragmentsBuilder::getSubHeading(const NamedDecl *Decl) {
+  DeclarationFragments Fragments;
+  if (!Decl->getName().empty())
+    Fragments.append(Decl->getName(),
+                     DeclarationFragments::FragmentKind::Identifier);
+  return Fragments;
+} // namespace symbolgraph
+} // namespace clang
Index: clang/lib/SymbolGraph/CMakeLists.txt
--- /dev/null
+++ clang/lib/SymbolGraph/CMakeLists.txt
@@ -0,0 +1,13 @@
+  Support
+  )
+  API.cpp
+  DeclarationFragments.cpp
+  Serialization.cpp
+  clangAST
+  clangBasic
+  )
Index: clang/lib/SymbolGraph/API.cpp
--- /dev/null
+++ clang/lib/SymbolGraph/API.cpp
@@ -0,0 +1,75 @@
+//===- SymbolGraph/API.cpp --------------------------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines SymbolGraph API records.
+#include "clang/SymbolGraph/API.h"
+#include "clang/AST/CommentCommandTraits.h"
+#include "clang/AST/CommentLexer.h"
+#include "clang/AST/RawCommentList.h"
+#include "llvm/Support/Allocator.h"
+namespace clang {
+namespace symbolgraph {
+GlobalRecord *
+API::addGlobal(GVKind Kind, StringRef Name, StringRef USR, PresumedLoc Loc,
+               const AvailabilityInfo &Availability, LinkageInfo Linkage,
+               const DocComment &Comment, DeclarationFragments Fragments,
+               DeclarationFragments SubHeading, FunctionSignature Signature) {
+  auto Result = Globals.insert({Name, nullptr});
+  if (Result.second) {
+    USR = copyString(USR);
+    GlobalRecord *Record = new (Allocator)
+        GlobalRecord{Kind,    Name,    USR,       Loc,        Availability,
+                     Linkage, Comment, Fragments, SubHeading, Signature};
+    Result.first->second = Record;
+  }
+  return Result.first->second;
+GlobalRecord *API::addGlobalVar(StringRef Name, StringRef USR, PresumedLoc Loc,
+                                const AvailabilityInfo &Availability,
+                                LinkageInfo Linkage, const DocComment &Comment,
+                                DeclarationFragments Fragments,
+                                DeclarationFragments SubHeading) {
+  return addGlobal(GVKind::Variable, Name, USR, Loc, Availability, Linkage,
+                   Comment, Fragments, SubHeading, {});
+GlobalRecord *API::addFunction(StringRef Name, StringRef USR, PresumedLoc Loc,
+                               const AvailabilityInfo &Availability,
+                               LinkageInfo Linkage, const DocComment &Comment,
+                               DeclarationFragments Fragments,
+                               DeclarationFragments SubHeading,
+                               FunctionSignature Signature) {
+  return addGlobal(GVKind::Function, Name, USR, Loc, Availability, Linkage,
+                   Comment, Fragments, SubHeading, Signature);
+StringRef API::copyString(StringRef String, llvm::BumpPtrAllocator &Allocator) {
+  if (String.empty())
+    return {};
+  if (Allocator.identifyObject(String.data()))
+    return String;
+  void *Ptr = Allocator.Allocate(String.size(), 1);
+  memcpy(Ptr, String.data(), String.size());
+  return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
+StringRef API::copyString(StringRef String) {
+  return copyString(String, Allocator);
+} // namespace symbolgraph
+} // namespace clang
Index: clang/lib/Frontend/ExtractAPIConsumer.cpp
--- clang/lib/Frontend/ExtractAPIConsumer.cpp
+++ clang/lib/Frontend/ExtractAPIConsumer.cpp
@@ -1,32 +1,206 @@
+//===- ExtractAPIConsumer.cpp -----------------------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines the ExtractAPI AST visitor to collect API information.
 #include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/ParentMapContext.h"
+#include "clang/AST/RawCommentList.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/ASTConsumers.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendActions.h"
+#include "clang/Index/USRGeneration.h"
+#include "clang/SymbolGraph/API.h"
+#include "clang/SymbolGraph/AvailabilityInfo.h"
+#include "clang/SymbolGraph/DeclarationFragments.h"
+#include "clang/SymbolGraph/Serialization.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace clang;
+using namespace symbolgraph;
 namespace {
 class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
-  bool VisitNamedDecl(NamedDecl *Decl) {
-    llvm::outs() << Decl->getName() << "\n";
+  explicit ExtractAPIVisitor(ASTContext &Context)
+      : Context(Context),
+        API(Context.getTargetInfo().getTriple(), Context.getLangOpts()) {}
+  const API &getAPI() const { return API; }
+  bool VisitVarDecl(const VarDecl *Decl) {
+    // Skip function parameters.
+    if (isa<ParmVarDecl>(Decl))
+      return true;
+    // Skip non-global variables in records (struct/union/class).
+    if (Decl->getDeclContext()->isRecord())
+      return true;
+    // Skip local variables inside function or method.
+    if (!Decl->isDefinedOutsideFunctionOrMethod())
+      return true;
+    // If this is a template but not specialization or instantiation, skip.
+    if (Decl->getASTContext().getTemplateOrSpecializationInfo(Decl) &&
+        Decl->getTemplateSpecializationKind() == TSK_Undeclared)
+      return true;
+    StringRef Name = Decl->getName();
+    SmallString<128> USR;
+    index::generateUSRForDecl(Decl, USR);
+    PresumedLoc Loc =
+        Context.getSourceManager().getPresumedLoc(Decl->getLocation());
+    AvailabilityInfo Availability = getAvailability(Decl);
+    LinkageInfo Linkage = Decl->getLinkageAndVisibility();
+    DocComment Comment;
+    if (auto *RawComment = Context.getRawCommentForDeclNoCache(Decl))
+      Comment = RawComment->getFormattedLines(Context.getSourceManager(),
+                                              Context.getDiagnostics());
+    DeclarationFragments Declaration =
+        DeclarationFragmentsBuilder::getFragmentsForVar(Decl);
+    DeclarationFragments SubHeading =
+        DeclarationFragmentsBuilder::getSubHeading(Decl);
+    API.addGlobalVar(Name, USR, Loc, Availability, Linkage, Comment,
+                     Declaration, SubHeading);
     return true;
+  bool VisitFunctionDecl(const FunctionDecl *Decl) {
+    if (const auto *Method = dyn_cast<CXXMethodDecl>(Decl)) {
+      // Skip member function in class templates.
+      if (Method->getParent()->getDescribedClassTemplate() != nullptr)
+        return true;
+      // Skip methods in records.
+      for (auto P : Context.getParents(*Method)) {
+        if (P.get<CXXRecordDecl>())
+          return true;
+      }
+      // Skip ConstructorDecl and DestructorDecl.
+      if (isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method))
+        return true;
+    }
+    // Skip templated functions.
+    switch (Decl->getTemplatedKind()) {
+    case FunctionDecl::TK_NonTemplate:
+      break;
+    case FunctionDecl::TK_MemberSpecialization:
+    case FunctionDecl::TK_FunctionTemplateSpecialization:
+      if (auto *TemplateInfo = Decl->getTemplateSpecializationInfo()) {
+        if (!TemplateInfo->isExplicitInstantiationOrSpecialization())
+          return true;
+      }
+      break;
+    case FunctionDecl::TK_FunctionTemplate:
+    case FunctionDecl::TK_DependentFunctionTemplateSpecialization:
+      return true;
+    }
+    StringRef Name = Decl->getName();
+    SmallString<128> USR;
+    index::generateUSRForDecl(Decl, USR);
+    PresumedLoc Loc =
+        Context.getSourceManager().getPresumedLoc(Decl->getLocation());
+    AvailabilityInfo Availability = getAvailability(Decl);
+    LinkageInfo Linkage = Decl->getLinkageAndVisibility();
+    DocComment Comment;
+    if (auto *RawComment = Context.getRawCommentForDeclNoCache(Decl))
+      Comment = RawComment->getFormattedLines(Context.getSourceManager(),
+                                              Context.getDiagnostics());
+    DeclarationFragments Declaration =
+        DeclarationFragmentsBuilder::getFragmentsForFunction(Decl);
+    DeclarationFragments SubHeading =
+        DeclarationFragmentsBuilder::getSubHeading(Decl);
+    FunctionSignature Signature =
+        DeclarationFragmentsBuilder::getFunctionSignature(Decl);
+    API.addFunction(Name, USR, Loc, Availability, Linkage, Comment, Declaration,
+                    SubHeading, Signature);
+    return true;
+  }
+  AvailabilityInfo getAvailability(const Decl *D) const {
+    StringRef PlatformName = Context.getTargetInfo().getPlatformName();
+    AvailabilityInfo Availability;
+    for (const auto *RD : D->redecls()) {
+      for (const auto *A : RD->specific_attrs<AvailabilityAttr>()) {
+        if (A->getPlatform()->getName() != PlatformName)
+          continue;
+        Availability =
+            AvailabilityInfo(A->getIntroduced(), A->getDeprecated(),
+                             A->getObsoleted(), A->getUnavailable(), false);
+        break;
+      }
+      if (const auto *A = RD->getAttr<UnavailableAttr>())
+        if (!A->isImplicit())
+          Availability.Unavailable = true;
+      if (const auto *A = RD->getAttr<DeprecatedAttr>())
+        if (!A->isImplicit())
+          Availability.UnconditionallyDeprecated = true;
+    }
+    return Availability;
+  }
+  ASTContext &Context;
 class ExtractAPIConsumer : public ASTConsumer {
+  explicit ExtractAPIConsumer(ASTContext &Context,
+                              std::unique_ptr<raw_pwrite_stream> OS)
+      : Visitor(Context), OS(std::move(OS)) {}
   void HandleTranslationUnit(ASTContext &Context) override {
+    Serializer Serializer(Visitor.getAPI());
+    Serializer.serialize(*OS);
   ExtractAPIVisitor Visitor;
+  std::unique_ptr<raw_pwrite_stream> OS;
 } // namespace
 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
-  return std::make_unique<ExtractAPIConsumer>();
+  std::unique_ptr<raw_pwrite_stream> OS = CreateOutputFile(CI, InFile);
+  if (!OS)
+    return nullptr;
+  return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
+                                              std::move(OS));
+ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) {
+  std::unique_ptr<raw_pwrite_stream> OS =
+      CI.createDefaultOutputFile(/*Binary=*/false, InFile, /*Extension=*/"json",
+                                 /*RemoveFileOnSignal=*/false);
+  if (!OS)
+    return nullptr;
+  return OS;
Index: clang/lib/Frontend/CMakeLists.txt
--- clang/lib/Frontend/CMakeLists.txt
+++ clang/lib/Frontend/CMakeLists.txt
@@ -50,8 +50,10 @@
+  clangIndex
+  clangSymbolGraph
Index: clang/lib/CMakeLists.txt
--- clang/lib/CMakeLists.txt
+++ clang/lib/CMakeLists.txt
@@ -23,6 +23,7 @@
Index: clang/lib/AST/RawCommentList.cpp
--- clang/lib/AST/RawCommentList.cpp
+++ clang/lib/AST/RawCommentList.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/CommentSema.h"
 #include "clang/Basic/CharInfo.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Allocator.h"
 using namespace clang;
@@ -362,6 +363,27 @@
   if (CommentText.empty())
     return "";
+  auto DropTrailingNewLines = [](std::string &Str) {
+    while (!Str.empty() && Str.back() == '\n')
+      Str.pop_back();
+  };
+  std::string Result;
+  for (const RawComment::CommentLine &Line :
+       getFormattedLines(SourceMgr, Diags))
+    Result += Line.Text + "\n";
+  DropTrailingNewLines(Result);
+  return Result;
+RawComment::getFormattedLines(const SourceManager &SourceMgr,
+                              DiagnosticsEngine &Diags) const {
+  llvm::StringRef CommentText = getRawText(SourceMgr);
+  if (CommentText.empty())
+    return {};
   llvm::BumpPtrAllocator Allocator;
   // We do not parse any commands, so CommentOptions are ignored by
   // comments::Lexer. Therefore, we just use default-constructed options.
@@ -371,13 +393,23 @@
                     CommentText.begin(), CommentText.end(),
-  std::string Result;
+  std::vector<RawComment::CommentLine> Result;
   // A column number of the first non-whitespace token in the comment text.
   // We skip whitespace up to this column, but keep the whitespace after this
   // column. IndentColumn is calculated when lexing the first line and reused
   // for the rest of lines.
   unsigned IndentColumn = 0;
+  // Record the line number of the last processed comment line.
+  // For block-style comments, an extra newline token will be produced after
+  // the end-comment marker, e.g.:
+  //   /** This is a multi-line comment block.
+  //       The lexer will produce two newline tokens here > */
+  // previousLine will record the line number when we previously saw a newline
+  // token and recorded a comment line. If we see another newline token on the
+  // same line, don't record anything in between.
+  unsigned PreviousLine = 0;
   // Processes one line of the comment and adds it to the result.
   // Handles skipping the indent at the start of the line.
   // Returns false when eof is reached and true otherwise.
@@ -389,9 +421,14 @@
     if (Tok.is(comments::tok::eof))
       return false;
     if (Tok.is(comments::tok::newline)) {
-      Result += "\n";
+      PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation());
+      if (Loc.getLine() != PreviousLine) {
+        Result.emplace_back("", Loc, Loc);
+        PreviousLine = Loc.getLine();
+      }
       return true;
+    std::string Line;
     llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
     bool LocInvalid = false;
     unsigned TokColumn =
@@ -417,32 +454,35 @@
                   std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
     llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
-    Result += Trimmed;
+    Line += Trimmed;
+    // Get the beginning location of the adjusted comment line.
+    PresumedLoc Begin =
+        SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen));
     // Lex all tokens in the rest of the line.
     for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
       if (Tok.is(comments::tok::newline)) {
-        Result += "\n";
+        // Get the ending location of the comment line.
+        PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
+        if (End.getLine() != PreviousLine) {
+          Result.emplace_back(Line, Begin, End);
+          PreviousLine = End.getLine();
+        }
         return true;
-      Result += L.getSpelling(Tok, SourceMgr);
+      Line += L.getSpelling(Tok, SourceMgr);
+    PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
+    Result.emplace_back(Line, Begin, End);
     // We've reached the end of file token.
     return false;
-  auto DropTrailingNewLines = [](std::string &Str) {
-    while (!Str.empty() && Str.back() == '\n')
-      Str.pop_back();
-  };
   // Process first line separately to remember indent for the following lines.
-  if (!LexLine(/*IsFirstLine=*/true)) {
-    DropTrailingNewLines(Result);
+  if (!LexLine(/*IsFirstLine=*/true))
     return Result;
-  }
   // Process the rest of the lines.
   while (LexLine(/*IsFirstLine=*/false))
-  DropTrailingNewLines(Result);
   return Result;
Index: clang/include/clang/SymbolGraph/Serialization.h
--- /dev/null
+++ clang/include/clang/SymbolGraph/Serialization.h
@@ -0,0 +1,54 @@
+//===- SymbolGraph/Serialization.h ------------------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines the SymbolGraph serializer and parser.
+#include "clang/SymbolGraph/API.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/raw_ostream.h"
+namespace clang {
+namespace symbolgraph {
+struct SerializerOption {
+  bool Compact;
+class Serializer {
+  Serializer(const API &API, SerializerOption Options = {})
+      : API(API), Options(Options) {}
+  llvm::json::Object serialize();
+  void serialize(llvm::raw_ostream &os);
+  llvm::json::Object serializeMetadata() const;
+  llvm::json::Object serializeModule() const;
+  llvm::json::Object serializeAPIRecord(const APIRecord &Record) const;
+  void serializeGlobalRecord(const GlobalRecord &Record);
+  const API &API;
+  SerializerOption Options;
+  llvm::json::Array Symbols;
+  llvm::json::Array Relationships;
+  static const llvm::VersionTuple FormatVersion;
+} // namespace symbolgraph
+} // namespace clang
Index: clang/include/clang/SymbolGraph/DeclarationFragments.h
--- /dev/null
+++ clang/include/clang/SymbolGraph/DeclarationFragments.h
@@ -0,0 +1,140 @@
+//===- SymbolGraph/DeclarationFragments.h -----------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines SymbolGraph Declaration Fragments related classes.
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+namespace clang {
+namespace symbolgraph {
+class DeclarationFragments {
+  DeclarationFragments() = default;
+  enum class FragmentKind {
+    None,
+    Keyword,
+    Attribute,
+    NumberLiteral,
+    StringLiteral,
+    Identifier,
+    TypeIdentifier,
+    GenericParameter,
+    ExternalParam,
+    InternalParam,
+    Text,
+  };
+  struct Fragment {
+    std::string Spelling;
+    FragmentKind Kind;
+    std::string PreciseIdentifier;
+    Fragment(StringRef Spelling, FragmentKind Kind, StringRef PreciseIdentifier)
+        : Spelling(Spelling), Kind(Kind), PreciseIdentifier(PreciseIdentifier) {
+    }
+  };
+  const std::vector<Fragment> &getFragments() const { return Fragments; }
+  DeclarationFragments &append(StringRef Spelling, FragmentKind Kind,
+                               StringRef PreciseIdentifier = "") {
+    if (Kind == FragmentKind::Text && !Fragments.empty() &&
+        Fragments.back().Kind == FragmentKind::Text) {
+      Fragments.back().Spelling.append(Spelling.data(), Spelling.size());
+    } else {
+      Fragments.emplace_back(Spelling, Kind, PreciseIdentifier);
+    }
+    return *this;
+  }
+  DeclarationFragments &append(DeclarationFragments &&Other) {
+    Fragments.insert(Fragments.end(),
+                     std::make_move_iterator(Other.Fragments.begin()),
+                     std::make_move_iterator(Other.Fragments.end()));
+    Other.Fragments.clear();
+    return *this;
+  }
+  DeclarationFragments &appendSpace();
+  static StringRef getFragmentKindString(FragmentKind Kind);
+  static FragmentKind parseFragmentKindFromString(StringRef S);
+  std::vector<Fragment> Fragments;
+class FunctionSignature {
+  FunctionSignature() = default;
+  struct Parameter {
+    std::string Name;
+    DeclarationFragments Fragments;
+    Parameter(StringRef Name, DeclarationFragments Fragments)
+        : Name(Name), Fragments(Fragments) {}
+  };
+  const std::vector<Parameter> &getParameters() const { return Parameters; }
+  const DeclarationFragments &getReturnType() const { return ReturnType; }
+  FunctionSignature &addParameter(StringRef Name,
+                                  DeclarationFragments Fragments) {
+    Parameters.emplace_back(Name, Fragments);
+    return *this;
+  }
+  void setReturnType(DeclarationFragments RT) { ReturnType = RT; }
+  bool empty() const {
+    return Parameters.empty() && ReturnType.getFragments().empty();
+  }
+  std::vector<Parameter> Parameters;
+  DeclarationFragments ReturnType;
+class DeclarationFragmentsBuilder {
+  static DeclarationFragments getFragmentsForVar(const VarDecl *);
+  static DeclarationFragments getFragmentsForFunction(const FunctionDecl *);
+  static DeclarationFragments getSubHeading(const NamedDecl *);
+  static FunctionSignature getFunctionSignature(const FunctionDecl *);
+  DeclarationFragmentsBuilder() = delete;
+  static DeclarationFragments getFragmentsForType(const QualType, ASTContext &,
+                                                  DeclarationFragments &);
+  static DeclarationFragments getFragmentsForType(const Type *, ASTContext &,
+                                                  DeclarationFragments &);
+  static DeclarationFragments getFragmentsForNNS(const NestedNameSpecifier *,
+                                                 ASTContext &,
+                                                 DeclarationFragments &);
+  static DeclarationFragments getFragmentsForQualifiers(const Qualifiers quals);
+  static DeclarationFragments getFragmentsForParam(const ParmVarDecl *);
+} // namespace symbolgraph
+} // namespace clang
Index: clang/include/clang/SymbolGraph/AvailabilityInfo.h
--- /dev/null
+++ clang/include/clang/SymbolGraph/AvailabilityInfo.h
@@ -0,0 +1,60 @@
+//===- SymbolGraph/AvailabilityInfo.h - Availability Info -------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines the Availability Info for a declaration.
+#include "llvm/Support/Error.h"
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/raw_ostream.h"
+using llvm::VersionTuple;
+namespace clang {
+namespace symbolgraph {
+struct AvailabilityInfo {
+  VersionTuple Introduced;
+  VersionTuple Deprecated;
+  VersionTuple Obsoleted;
+  bool Unavailable{false};
+  bool UnconditionallyDeprecated{false};
+  explicit AvailabilityInfo(bool Unavailable = false)
+      : Unavailable(Unavailable) {}
+  AvailabilityInfo(VersionTuple I, VersionTuple D, VersionTuple O, bool U,
+                   bool UD)
+      : Introduced(I), Deprecated(D), Obsoleted(O), Unavailable(U),
+        UnconditionallyDeprecated(UD) {}
+  bool isDefault() const { return *this == AvailabilityInfo(); }
+  bool isUnavailable() const { return Unavailable; }
+  bool isUnconditionallyDeprecated() const { return UnconditionallyDeprecated; }
+  friend bool operator==(const AvailabilityInfo &Lhs,
+                         const AvailabilityInfo &Rhs);
+inline bool operator==(const AvailabilityInfo &Lhs,
+                       const AvailabilityInfo &Rhs) {
+  return std::tie(Lhs.Introduced, Lhs.Deprecated, Lhs.Obsoleted,
+                  Lhs.Unavailable, Lhs.UnconditionallyDeprecated) ==
+         std::tie(Rhs.Introduced, Rhs.Deprecated, Rhs.Obsoleted,
+                  Rhs.Unavailable, Rhs.UnconditionallyDeprecated);
+} // namespace symbolgraph
+} // namespace clang
Index: clang/include/clang/SymbolGraph/API.h
--- /dev/null
+++ clang/include/clang/SymbolGraph/API.h
@@ -0,0 +1,132 @@
+//===- SymbolGraph/API.h ----------------------------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+/// \file
+/// \brief Defines SymbolGraph API records.
+#include "clang/AST/Decl.h"
+#include "clang/AST/RawCommentList.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/SymbolGraph/AvailabilityInfo.h"
+#include "clang/SymbolGraph/DeclarationFragments.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+namespace clang {
+namespace symbolgraph {
+using DocComment = std::vector<RawComment::CommentLine>;
+struct APIRecord {
+  StringRef Name;
+  StringRef USR;
+  PresumedLoc Location;
+  AvailabilityInfo Availability;
+  LinkageInfo Linkage;
+  DocComment Comment;
+  DeclarationFragments Declaration;
+  DeclarationFragments SubHeading;
+  /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
+  enum RecordKind {
+    RK_Global,
+  };
+  const RecordKind Kind;
+  RecordKind getKind() const { return Kind; }
+  APIRecord(RecordKind Kind, StringRef Name, StringRef USR,
+            PresumedLoc Location, const AvailabilityInfo &Availability,
+            LinkageInfo Linkage, const DocComment &Comment,
+            DeclarationFragments Declaration, DeclarationFragments SubHeading)
+      : Name(Name), USR(USR), Location(Location), Availability(Availability),
+        Linkage(Linkage), Comment(Comment), Declaration(Declaration),
+        SubHeading(SubHeading), Kind(Kind) {}
+enum class GVKind : uint8_t {
+  Unknown = 0,
+  Variable = 1,
+  Function = 2,
+struct GlobalRecord : APIRecord {
+  GVKind GlobalKind;
+  FunctionSignature Signature;
+  GlobalRecord(GVKind Kind, StringRef Name, StringRef USR, PresumedLoc Loc,
+               const AvailabilityInfo &Availability, LinkageInfo Linkage,
+               const DocComment &Comment, DeclarationFragments Declaration,
+               DeclarationFragments SubHeading, FunctionSignature Signature)
+      : APIRecord(RK_Global, Name, USR, Loc, Availability, Linkage, Comment,
+                  Declaration, SubHeading),
+        GlobalKind(Kind), Signature(Signature) {}
+  static bool classof(const APIRecord *Record) {
+    return Record->getKind() == RK_Global;
+  }
+class API {
+  API(const llvm::Triple &Target, const LangOptions &LangOpts)
+      : Target(Target), LangOpts(LangOpts) {}
+  const llvm::Triple &getTarget() const { return Target; }
+  const LangOptions &getLangOpts() const { return LangOpts; }
+  GlobalRecord *addGlobal(GVKind Kind, StringRef Name, StringRef USR,
+                          PresumedLoc Loc, const AvailabilityInfo &Availability,
+                          LinkageInfo Linkage, const DocComment &Comment,
+                          DeclarationFragments Declaration,
+                          DeclarationFragments SubHeading,
+                          FunctionSignature Signature);
+  GlobalRecord *addGlobalVar(StringRef Name, StringRef USR, PresumedLoc Loc,
+                             const AvailabilityInfo &Availability,
+                             LinkageInfo Linkage, const DocComment &Comment,
+                             DeclarationFragments Declaration,
+                             DeclarationFragments SubHeading);
+  GlobalRecord *addFunction(StringRef Name, StringRef USR, PresumedLoc Loc,
+                            const AvailabilityInfo &Availability,
+                            LinkageInfo Linkage, const DocComment &Comment,
+                            DeclarationFragments Declaration,
+                            DeclarationFragments SubHeading,
+                            FunctionSignature Signature);
+  StringRef copyString(StringRef String, llvm::BumpPtrAllocator &Allocator);
+  StringRef copyString(StringRef String);
+  using GlobalRecordMap = llvm::MapVector<StringRef, GlobalRecord *>;
+  const GlobalRecordMap &getGlobals() const { return Globals; }
+  llvm::BumpPtrAllocator Allocator;
+  const llvm::Triple Target;
+  const LangOptions LangOpts;
+  GlobalRecordMap Globals;
+} // namespace symbolgraph
+} // namespace clang
Index: clang/include/clang/Frontend/FrontendActions.h
--- clang/include/clang/Frontend/FrontendActions.h
+++ clang/include/clang/Frontend/FrontendActions.h
@@ -275,6 +275,10 @@
   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) override;
+  static std::unique_ptr<llvm::raw_pwrite_stream>
+  CreateOutputFile(CompilerInstance &CI, StringRef InFile);
Index: clang/include/clang/AST/RawCommentList.h
--- clang/include/clang/AST/RawCommentList.h
+++ clang/include/clang/AST/RawCommentList.h
@@ -139,6 +139,21 @@
   std::string getFormattedText(const SourceManager &SourceMgr,
                                DiagnosticsEngine &Diags) const;
+  struct CommentLine {
+    std::string Text;
+    PresumedLoc Begin;
+    PresumedLoc End;
+    CommentLine(StringRef Text, PresumedLoc Begin, PresumedLoc End)
+        : Text(Text), Begin(Begin), End(End) {}
+  };
+  /// Returns sanitized comment text as separated lines with locations in
+  /// source, suitable for further processing and rendering requiring source
+  /// locations.
+  std::vector<CommentLine> getFormattedLines(const SourceManager &SourceMgr,
+                                             DiagnosticsEngine &Diags) const;
   /// Parse the comment, assuming it is attached to decl \c D.
   comments::FullComment *parse(const ASTContext &Context,
                                const Preprocessor *PP, const Decl *D) const;
cfe-commits mailing list

Reply via email to