hokein created this revision.
hokein added a reviewer: sammccall.
Herald added a subscriber: mgorny.
Herald added a project: All.
hokein requested review of this revision.
Herald added a subscriber: alextsao1999.
Herald added a project: clang-tools-extra.

The main idea is to compile the cxx grammar at build time, and construct
the core pieces (Grammar, LRTable) of the pseudoparse based on the compiled
data sources.

This is a tiny implementation, which is good for start:

- defines how the public API should look like;
- integrates the cxx grammar compilation workflow with the cmake system.
- onlynonterminal symbols of the C++ grammar are compiled, anything else are 
still doing the real compilation work at runtime, we can opt-in more bits in 
the future;
- splits the monolithic clangPsuedo library for better layering;


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125667

Files:
  clang-tools-extra/pseudo/CMakeLists.txt
  clang-tools-extra/pseudo/gen/CMakeLists.txt
  clang-tools-extra/pseudo/gen/Main.cpp
  clang-tools-extra/pseudo/gen/cxx_gen.cmake
  clang-tools-extra/pseudo/include/CMakeLists.txt
  clang-tools-extra/pseudo/include/clang-pseudo/cxx/cxx.h
  clang-tools-extra/pseudo/lib/CMakeLists.txt
  clang-tools-extra/pseudo/lib/cxx/cxx.cpp

Index: clang-tools-extra/pseudo/lib/cxx/cxx.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/lib/cxx/cxx.cpp
@@ -0,0 +1,34 @@
+//===--- cxx.cpp - Define public intefaces for C++ grammar ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/cxx/cxx.h"
+#include "clang-pseudo/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+namespace cxx {
+
+static const char *CxxBNF =
+#include "CxxBNF.inc"
+    ;
+
+const Grammar &getGrammar() {
+  static std::vector<std::string> Diags;
+  static std::unique_ptr<Grammar> G = Grammar::parseBNF(CxxBNF, Diags);
+  assert(Diags.empty());
+  return *G;
+}
+
+const LRTable &getLRTable() {
+  static LRTable Table = LRTable::buildSLR(getGrammar());
+  return Table;
+}
+
+} // namespace cxx
+} // namespace pseudo
+} // namespace clang
Index: clang-tools-extra/pseudo/lib/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_LINK_COMPONENTS Support)
 
-add_clang_library(clangPseudo
+# Needed by LLVM's CMake checks because this file defines multiple targets.
+set(LLVM_OPTIONAL_SOURCES
   DirectiveTree.cpp
   Forest.cpp
   GLR.cpp
@@ -11,8 +12,42 @@
   LRTable.cpp
   LRTableBuild.cpp
   Token.cpp
+  )
+
+add_clang_library(clangPseudoGrammar
+  Grammar.cpp
+  GrammarBNF.cpp
+  LRGraph.cpp
+  LRTable.cpp
+  LRTableBuild.cpp
+
+  # FIXME: can we get rid of the clangBasic dependency? We need it for the
+  # clang::tok::getTokenName and clang::tok::getPunctuatorSpelling functions, we
+  # could consider remimplement these functions.
+  LINK_LIBS
+  clangBasic
+  )
+
+add_clang_library(clangPseudo
+  DirectiveTree.cpp
+  Forest.cpp
+  GLR.cpp
+  Lex.cpp
+  Token.cpp
 
   LINK_LIBS
   clangBasic
   clangLex
+  clangPseudoGrammar
+  )
+
+include(${CMAKE_CURRENT_SOURCE_DIR}/../gen/cxx_gen.cmake)
+add_clang_library(clangPseudoCxx
+  cxx/cxx.cpp
+
+  DEPENDS
+  cxx_gen
+
+  LINK_LIBS
+  clangPseudoGrammar
   )
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/cxx.h
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/cxx.h
@@ -0,0 +1,51 @@
+//===--- cxx.h - Public interfaces for the C++ grammar -----------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines public interfaces for the C++ grammar
+//  (pseudo/lib/cxx.bnf). It provides a fast way to access core building pieces
+//  of the LR parser, e.g. Grammar, LRTable, rather than parsing the grammar
+//  file at the runtime.
+//
+//  We do a compilation of the C++ BNF grammar at build time, and generate
+//  critical data sources. The implementation of the interfaces are based on the
+//  generated data sources.
+//
+//  FIXME: not everything is fully compiled yet. The implementation of the
+//  interfaces are still parsing the grammar file at the runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_CXX_CXX_H
+#define CLANG_PSEUDO_CXX_CXX_H
+
+#include "clang-pseudo/Grammar.h"
+
+namespace clang {
+namespace pseudo {
+class LRTable;
+
+namespace cxx {
+// Symbol represents nonterminal symbols in the C++ grammar.
+// It provides a simple uniform way to access a particular nonterminal.
+enum Symbol : SymbolID {
+#define NONTERMINAL(X, Y) X = Y,
+#include "CxxSymbols.inc"
+#undef NONTERMINAL
+};
+
+// Returns the C++ grammar.
+const Grammar &getGrammar();
+// Returns the corresponding LRTable for the C++ grammar.
+const LRTable &getLRTable();
+
+} // namespace cxx
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_CXX_CXX_H
Index: clang-tools-extra/pseudo/include/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/CMakeLists.txt
@@ -0,0 +1,3 @@
+# We put an empty cmake file here so that cmake can create an include directory
+# in the build directory, the include directory is the home for generated source
+# files.
Index: clang-tools-extra/pseudo/gen/cxx_gen.cmake
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/gen/cxx_gen.cmake
@@ -0,0 +1,29 @@
+# The cxx.bnf grammar file
+set(cxx_bnf ${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxx.bnf)
+
+# Generate inc files.
+set(cxx_symbols_inc ${CLANG_PSEUDO_BINARY_DIR}/include/CxxSymbols.inc)
+add_custom_command(OUTPUT ${cxx_symbols_inc}
+   COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen"
+     --grammar ${cxx_bnf}
+     --emit-symbol-list
+     > ${cxx_symbols_inc}
+   COMMENT "Generating nonterminal symbol file for cxx grammar..."
+   DEPENDS pseudo-gen
+   VERBATIM)
+
+set(cxx_bnf_inc ${CLANG_PSEUDO_BINARY_DIR}/include/CxxBNF.inc)
+add_custom_command(OUTPUT ${cxx_bnf_inc}
+   COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/pseudo-gen"
+     --grammar ${cxx_bnf}
+     --emit-grammar-content
+     > ${cxx_bnf_inc}
+   COMMENT "Generating bnf string file for cxx grammar..."
+   DEPENDS pseudo-gen
+   VERBATIM)
+
+# add_custom_command does not create a new target, we need to deine a target
+# explicitly, so that other targets can depend on it.
+add_custom_target(cxx_gen
+    DEPENDS ${cxx_symbols_inc} ${cxx_bnf_inc}
+    VERBATIM)
Index: clang-tools-extra/pseudo/gen/Main.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/gen/Main.cpp
@@ -0,0 +1,79 @@
+//===--- Main.cpp - Compile BNF grammar -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/Grammar.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+
+using llvm::cl::desc;
+using llvm::cl::init;
+using llvm::cl::opt;
+using llvm::cl::values;
+
+namespace {
+enum EmitType {
+  EmitSymbolList,
+  EmitGrammarContent,
+};
+
+opt<std::string> Grammar("grammar", desc("Parse and check a BNF grammar file."),
+                         init(""));
+opt<EmitType>
+    Emit(desc("which information to emit:"),
+         values(clEnumValN(EmitSymbolList, "emit-symbol-list",
+                           "Print nonterminal symbols (default)"),
+                clEnumValN(EmitGrammarContent, "emit-grammar-content",
+                           "Print the BNF grammar content as a string")));
+std::string readOrDie(llvm::StringRef Path) {
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
+      llvm::MemoryBuffer::getFile(Path);
+  if (std::error_code EC = Text.getError()) {
+    llvm::errs() << "Error: can't read grammar file '" << Path
+                 << "': " << EC.message() << "\n";
+    ::exit(1);
+  }
+  return Text.get()->getBuffer().str();
+}
+} // namespace
+
+int main(int argc, char *argv[]) {
+  llvm::cl::ParseCommandLineOptions(argc, argv, "");
+  if (!Grammar.getNumOccurrences()) {
+    llvm::errs() << "Grammar file must be provided!\n";
+    return 1;
+  }
+
+  std::string GrammarText = readOrDie(Grammar);
+  std::vector<std::string> Diags;
+  auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags);
+
+  if (!Diags.empty()) {
+    llvm::errs() << llvm::join(Diags, "\n");
+    return 1;
+  }
+  switch (Emit) {
+
+  case EmitSymbolList:
+    for (clang::pseudo::SymbolID ID = 0; ID < G->table().Nonterminals.size();
+         ++ID) {
+      std::string Name = G->symbolName(ID).str();
+      // translation-unit -> translation_unit
+      std::replace(Name.begin(), Name.end(), '-', '_');
+      llvm::outs() << (llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID));
+    }
+    break;
+  case EmitGrammarContent:
+    llvm::outs() << llvm::formatv("R\"bnf(\n{0})bnf\"\n", GrammarText);
+    break;
+  }
+
+  return 0;
+}
Index: clang-tools-extra/pseudo/gen/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/gen/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS Support)
+
+add_clang_executable(pseudo-gen
+  Main.cpp
+  )
+
+target_link_libraries(pseudo-gen
+  PRIVATE
+  clangPseudoGrammar
+  )
Index: clang-tools-extra/pseudo/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/CMakeLists.txt
+++ clang-tools-extra/pseudo/CMakeLists.txt
@@ -1,5 +1,10 @@
+set(CLANG_PSEUDO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+
 include_directories(include)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
+add_subdirectory(include)
+add_subdirectory(gen)
 add_subdirectory(lib)
 add_subdirectory(tool)
 add_subdirectory(fuzzer)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to