sammccall created this revision.
sammccall added reviewers: hokein, morehouse.
Herald added a subscriber: mgorny.
Herald added a project: All.
sammccall requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

It should be useful clang-fuzzer itself, though my own motivation is
to use this in fuzzing clang-pseudo. (clang-tools-extra/pseudo/fuzzer).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125166

Files:
  clang/test/CMakeLists.txt
  clang/test/Misc/fuzzer-dictionary.test
  clang/tools/clang-fuzzer/CMakeLists.txt
  clang/tools/clang-fuzzer/dictionary/CMakeLists.txt
  clang/tools/clang-fuzzer/dictionary/dictionary.c

Index: clang/tools/clang-fuzzer/dictionary/dictionary.c
===================================================================
--- /dev/null
+++ clang/tools/clang-fuzzer/dictionary/dictionary.c
@@ -0,0 +1,57 @@
+//===-- dictionary.c - Generate fuzzing dictionary for clang --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This binary emits a fuzzing dictionary describing strings that are
+// significant to the clang parser: keywords and other tokens.
+//
+// The dictionary can be used by a fuzzer to reach interesting parser states
+// much more quickly.
+//
+// The output is a single-file dictionary supported by libFuzzer and AFL:
+// https://llvm.org/docs/LibFuzzer.html#dictionaries
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+static void emit(const char *Name, const char *Spelling) {
+  static char Hex[] = "0123456789abcdef";
+
+  printf("%s=\"", Name);
+  unsigned char C;
+  while ((C = *Spelling++)) {
+    if (C < 32 || C == '"' || C == '\\')
+      printf("\\x%c%c", Hex[C>>4], Hex[C%16]);
+    else
+      printf("%c", C);
+  }
+  printf("\"\n");
+}
+
+int main(int argc, char **argv) {
+#define PUNCTUATOR(Name, Spelling) emit(#Name, Spelling);
+#define KEYWORD(Name, Criteria) emit(#Name, #Name);
+#define PPKEYWORD(Name) emit(#Name, #Name);
+#define CXX_KEYWORD_OPERATOR(Name, Equivalent) emit(#Name, #Name);
+#define OBJC_AT_KEYWORD(Name) emit(#Name, #Name);
+#define ALIAS(Spelling, Equivalent, Criteria) emit(Spelling, Spelling);
+#include "clang/Basic/TokenKinds.def"
+  // Some other sub-token chunks significant to the lexer.
+  emit("ucn16", "\\u0000");
+  emit("ucn32", "\\U00000000");
+  emit("rawstart", "R\"(");
+  emit("rawend", ")\"");
+  emit("quote", "\"");
+  emit("squote", "'");
+  emit("u8quote", "u8\"");
+  emit("u16quote", "u\"");
+  emit("u32quote", "U\"");
+  emit("esc_nl", "\\\n");
+  emit("hex", "0x");
+}
+
Index: clang/tools/clang-fuzzer/dictionary/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang/tools/clang-fuzzer/dictionary/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_clang_executable(clang-fuzzer-dictionary dictionary.c)
+
Index: clang/tools/clang-fuzzer/CMakeLists.txt
===================================================================
--- clang/tools/clang-fuzzer/CMakeLists.txt
+++ clang/tools/clang-fuzzer/CMakeLists.txt
@@ -109,6 +109,7 @@
 
 add_clang_subdirectory(handle-cxx)
 add_clang_subdirectory(handle-llvm)
+add_clang_subdirectory(dictionary)
 
 add_clang_executable(clang-fuzzer
   EXCLUDE_FROM_ALL
Index: clang/test/Misc/fuzzer-dictionary.test
===================================================================
--- /dev/null
+++ clang/test/Misc/fuzzer-dictionary.test
@@ -0,0 +1,4 @@
+RUN: clang-fuzzer-dictionary | FileCheck %s
+CHECK-DAG: ampamp="&&"
+CHECK-DAG: catch="catch"
+CHECK-DAG: rawstart="R\x22("
Index: clang/test/CMakeLists.txt
===================================================================
--- clang/test/CMakeLists.txt
+++ clang/test/CMakeLists.txt
@@ -58,6 +58,7 @@
   apinotes-test
   c-index-test
   clang
+  clang-fuzzer-dictionary
   clang-resource-headers
   clang-format
   clang-tblgen
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to