usaxena95 updated this revision to Diff 291205. usaxena95 added a comment. Added README.md for the code completion model.
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D83814/new/ https://reviews.llvm.org/D83814 Files: clang-tools-extra/clangd/CMakeLists.txt clang-tools-extra/clangd/for-review-only/CompletionModel.cpp clang-tools-extra/clangd/for-review-only/CompletionModel.h clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h clang-tools-extra/clangd/quality/CompletionModel.cmake clang-tools-extra/clangd/quality/CompletionModelCodegen.py clang-tools-extra/clangd/quality/README.md clang-tools-extra/clangd/quality/model/features.json clang-tools-extra/clangd/quality/model/forest.json clang-tools-extra/clangd/unittests/CMakeLists.txt clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp clang-tools-extra/clangd/unittests/DecisionForestTests.cpp clang-tools-extra/clangd/unittests/model/CategoricalFeature.h clang-tools-extra/clangd/unittests/model/features.json clang-tools-extra/clangd/unittests/model/forest.json
Index: clang-tools-extra/clangd/unittests/model/forest.json =================================================================== --- /dev/null +++ clang-tools-extra/clangd/unittests/model/forest.json @@ -0,0 +1,52 @@ +[ + { + "operation": "if_greater", + "feature": "ANumber", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "AFloat", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "C" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "B" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] \ No newline at end of file Index: clang-tools-extra/clangd/unittests/model/features.json =================================================================== --- /dev/null +++ clang-tools-extra/clangd/unittests/model/features.json @@ -0,0 +1,16 @@ +[ + { + "name": "ANumber", + "type": "NUMBER" + }, + { + "name": "AFloat", + "type": "NUMBER" + }, + { + "name": "ACategorical", + "type": "ENUM", + "enum": "ns1::ns2::TestEnum", + "header": "model/CategoricalFeature.h" + } +] \ No newline at end of file Index: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h =================================================================== --- /dev/null +++ clang-tools-extra/clangd/unittests/model/CategoricalFeature.h @@ -0,0 +1,5 @@ +namespace ns1 { +namespace ns2 { +enum TestEnum { A, B, C, D }; +} // namespace ns2 +} // namespace ns1 Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp @@ -0,0 +1,29 @@ +#include "DecisionForestRuntimeTest.h" +#include "model/CategoricalFeature.h" +#include "gtest/gtest.h" + +namespace clangd { +namespace clangd { + +TEST(DecisionForestRuntime, Evaluate) { + using Example = ::ns1::ns2::test::Example; + using Cat = ::ns1::ns2::TestEnum; + using ::ns1::ns2::test::Evaluate; + + Example E; + E.setANumber(200); // True + E.setAFloat(0); // True: +10.0 + E.setACategorical(Cat::A); // True: +5.0 + EXPECT_EQ(Evaluate(E), 15.0); + + E.setANumber(200); // True + E.setAFloat(-2.5); // False: -20.0 + E.setACategorical(Cat::B); // True: +5.0 + EXPECT_EQ(Evaluate(E), -15.0); + + E.setANumber(100); // False + E.setACategorical(Cat::C); // True: +3.0, False: -6.0 + EXPECT_EQ(Evaluate(E), -3.0); +} +} // namespace clangd +} // namespace clangd Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -10,6 +10,7 @@ #include "ClangdServer.h" #include "CodeComplete.h" #include "Compiler.h" +#include "CompletionModel.h" #include "Matchers.h" #include "Protocol.h" #include "Quality.h" @@ -47,6 +48,7 @@ using ::testing::IsEmpty; using ::testing::Not; using ::testing::UnorderedElementsAre; +using ContextKind = CodeCompletionContext::Kind; // GMock helpers for matching completion items. MATCHER_P(Named, Name, "") { return arg.Name == Name; } @@ -161,6 +163,16 @@ return S; } +TEST(DecisionForestRuntime, SanityTest) { + using Example = clangd::Example; + using clangd::Evaluate; + Example E1; + E1.setContextKind(ContextKind::CCC_ArrowMemberAccess); + Example E2; + E2.setContextKind(ContextKind::CCC_SymbolOrNewName); + EXPECT_GT(Evaluate(E1), Evaluate(E2)); +} + TEST(CompletionTest, Limit) { clangd::CodeCompleteOptions Opts; Opts.Limit = 2; Index: clang-tools-extra/clangd/unittests/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/unittests/CMakeLists.txt +++ clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -28,6 +28,9 @@ set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp) endif() +include(${CMAKE_CURRENT_SOURCE_DIR}/../quality/CompletionModel.cmake) +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/model DecisionForestRuntimeTest ::ns1::ns2::test::Example) + add_custom_target(ClangdUnitTests) add_unittest(ClangdUnitTests ClangdTests Annotations.cpp @@ -44,6 +47,7 @@ ConfigCompileTests.cpp ConfigProviderTests.cpp ConfigYAMLTests.cpp + DecisionForestTests.cpp DexTests.cpp DiagnosticsTests.cpp DraftStoreTests.cpp @@ -88,6 +92,7 @@ TweakTesting.cpp URITests.cpp XRefsTests.cpp + ${DECISION_FOREST_OUTPUT_DIR}/DecisionForestRuntimeTest.cpp support/CancellationTests.cpp support/ContextTests.cpp @@ -102,6 +107,10 @@ $<TARGET_OBJECTS:obj.clangDaemonTweaks> ) +target_include_directories(ClangdTests PUBLIC + $<BUILD_INTERFACE:${DECISION_FOREST_OUTPUT_DIR}> +) + clang_target_link_libraries(ClangdTests PRIVATE clangAST Index: clang-tools-extra/clangd/quality/model/forest.json =================================================================== --- /dev/null +++ clang-tools-extra/clangd/quality/model/forest.json @@ -0,0 +1,18 @@ +[ + { + "operation": "if_member", + "feature": "ContextKind", + "set": [ + "CCC_DotMemberAccess", + "CCC_ArrowMemberAccess" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": 1.0 + } + } +] \ No newline at end of file Index: clang-tools-extra/clangd/quality/model/features.json =================================================================== --- /dev/null +++ clang-tools-extra/clangd/quality/model/features.json @@ -0,0 +1,8 @@ +[ + { + "name": "ContextKind", + "type": "ENUM", + "enum": "clang::CodeCompletionContext::Kind", + "header": "clang/Sema/CodeCompleteConsumer.h" + } +] \ No newline at end of file Index: clang-tools-extra/clangd/quality/README.md =================================================================== --- /dev/null +++ clang-tools-extra/clangd/quality/README.md @@ -0,0 +1,245 @@ +# Decision Forest Code Completion Model + +## Decision Forest +A **decision forest** is a collection of many decision trees. A **decision tree** is a full binary tree where every non-leaf node has exactly **2** children. + +In order to predict the relevance of a code completion item, we traverse each of the decision trees beginning with their roots until we reach a leaf. + +At every non-leaf node, we evaluate the condition present in the node. The condition refers to exactly one **feature**. It uses the value of this attribute from the code completion item to evaluate the condition. +Based on the condition, we move to its true child or the false child. + +The condition can be of two types: +- **if_greater**: Check whether a numerical feature is **>=** the **threshold**. +- **if_member**: Check whether the **enum** feature is contained in the **set** defined in the node. + +A leaf node only contains the value **score**. +Once we know the set of leaves (one from each decision tree), we add the **score** values from each of the leaves to get the final relevance score. + +## Model Input Format +The input model is represented in json format. +### Features +The file **features.json** defines the features available to the model. It is a json list of features. The features can be of following two types. +#### Number +``` +{ + "name": "a_numerical_feature", + "type": "NUMBER" +} +``` +#### Enum +``` +{ + "name": "an_enum_feature", + "type": "ENUM", + "enum": "fully::qualified::enum", + "header": "path/to/HeaderDeclaringEnum.h" +} +``` +The field `enum` specifies the fully qualified name of the enum. + +The field `header` specifies the header containing the declaration of the enum. This header is included by the inference runtime. + + +### Decision Forest +The file `forest.json` defines the decision forest. It is a json list of **DecisionTree**. + +**DecisionTree** is one of **IfGreaterNode**, **IfMemberNode**, **LeafNode**. +#### IfGreaterNode +``` +{ + "operation": "if_greater", + "feature": "a_numerical_feature", + "threshold": A real number, + "then": {A DecisionTree}, + "else": {A DecisionTree} +} +``` +#### IfMemberNode +``` +{ + "operation": "if_member", + "feature": "an_enum_feature", + "set": ["enum_value1", "enum_value2", ...], + "then": {A DecisionTree}, + "else": {A DecisionTree} +} +``` +#### LeafNode +``` +{ + "operation": "boost", + "score": A real number +} +``` + +## Code Generator for Inference +The implementation of inference runtime is split across: +- Build System (CMake) +- Generated code/API for inference +- Code generator +- Input model + +### Code generator +The code generator `CompletionModelCodegen.py` takes input the `${model}` dir and generates the inference library: +- `${output_dir}/{filename}.h` +- `${output_dir}/{filename}.cpp` + +### Build System +`CompletionModel.cmake` provides `gen_decision_forest` method . Client intending to use the CompletionModel for inference can use this to trigger the code generator and generate the inference library. It can then use the generated API by including and depending on this library. + +### Generated API for inference +The code generator defines the Example `class` inside relevant namespaces as specified in option `${cpp_class}`. + +Members of this generated class comprises of all the features mentioned in `features.json`. Thus this class can represent a code completion candidate that needs to be scored. + +The API also provides `float Evaluate(const MyClass&)` which can be used to score the completion candidate. + + +## Example +### model/features.json +``` +[ + { + "name": "ANumber", + "type": "NUMBER" + }, + { + "name": "AFloat", + "type": "NUMBER" + }, + { + "name": "ACategorical", + "type": "ENUM", + "enum": "ns1::ns2::TestEnum", + "header": "model/CategoricalFeature.h" + } +] +``` +### model/forest.json +``` +[ + { + "operation": "if_greater", + "feature": "ANumber", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "AFloat", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "C" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "B" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] +``` +### DecisionForestRuntime.h +``` +# ifndef GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +# define GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +# include <cstdint> + +namespace ns1 { +namespace ns2 { +namespace test { +class Example { +public: + void setANumber(float V) { ANumber = OrderEncode(V); } + void setAFloat(float V) { AFloat = OrderEncode(V); } + void setACategorical(unsigned V) { ACategorical = 1 << V; } + +private: + uint32_t ANumber = 0; + uint32_t AFloat = 0; + uint32_t ACategorical = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; +float Evaluate(const Example&); +} // namespace test +} // namespace ns2 +} // namespace ns1 +#endif // GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +``` +### DecisionForestRuntime.cpp +``` +#include <cstring> +#include <limits> + +#include "llvm/ADT/bit.h" +#include "model/CategoricalFeature.h" +#include "DecisionForestRuntimeTest.h" + +#define BIT(X) (1 << X) + +namespace ns1 { +namespace ns2 { +namespace test { + +using ACategorical_type = ns1::ns2::TestEnum; + +uint32_t Example::OrderEncode(float F) {...} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if(E.ANumber >= 3276275712 /*200.0*/) goto t0_n4; + t0_n1: if(E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::C))) goto t0_n3; + t0_n2: Score += -4.0; goto t1; + t0_n3: Score += 3.0; goto t1; + t0_n4: if(E.AFloat >= 1082130432 /*-1*/) goto t0_n6; + t0_n5: Score += -20.0; goto t1; + t0_n6: Score += 10.0; goto t1; + + t1: + t1_n0: if(E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::B))) goto t1_n2; + t1_n1: Score += -6.0; goto t2; + t1_n2: Score += 5.0; goto t2; + + t2: // No such tree. + return Score; +} +} // namespace test +} // namespace ns2 +} // namespace ns1 +``` \ No newline at end of file Index: clang-tools-extra/clangd/quality/CompletionModelCodegen.py =================================================================== --- /dev/null +++ clang-tools-extra/clangd/quality/CompletionModelCodegen.py @@ -0,0 +1,309 @@ +"""Code generator for Code Completion Model Inference. + +Tool runs on the Decision Forest model defined in {model} directory. +It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp +The generated files defines the Example class named {cpp_class} having all the features as class members. +The generated runtime provides an `Evaluate` function which can be used to score a code completion candidate. +""" + +import argparse +import json +import struct +from enum import Enum + + +class CppClass: + """Holds class name and names of the enclosing namespaces.""" + + def __init__(self, cpp_class): + ns_and_class = cpp_class.split("::") + self.ns = [ns for ns in ns_and_class[0:-1] if len(ns) > 0] + self.name = ns_and_class[-1] + if len(self.name) == 0: + raise ValueError("Empty class name.") + + def ns_begin(self): + """Returns snippet for opening namespace declarations.""" + open_ns = ["namespace {ns} {{".format(ns=ns) for ns in self.ns] + return "\n".join(open_ns) + + def ns_end(self): + """Returns snippet for closing namespace declarations.""" + close_ns = [ + "}} // namespace {ns}".format(ns=ns) for ns in reversed(self.ns)] + return "\n".join(close_ns) + + +def header_guard(filename): + '''Returns the header guard for the generated header.''' + return "GENERATED_DECISION_FOREST_MODEL_{}_H".format(filename.upper()) + + +def boost_node(n, label, next_label): + """Returns code snippet for a leaf/boost node. + Adds value of leaf to the score and jumps to the root of the next tree.""" + return "{label}: Score += {score}; goto {next_label};".format( + label=label, + score=n["score"], + next_label=next_label) + + +def if_greater_node(n, label, next_label): + """Returns code snippet for a if_greater node. + Jumps to true_label if the Example feature (NUMBER) is greater than the threshold. + Comparing integers is much faster than comparing floats. Assuming floating points + are represented as IEEE 754, it order-encodes the floats to integers before comparing them. + Control falls through if condition is evaluated to false.""" + return "{label}: if(E.{feature} >= {encoded} /*{threshold}*/) goto {true_label};".format( + label=label, + feature=n["feature"], + encoded=order_encode(n["threshold"]), + threshold=n["threshold"], + true_label=next_label) + + +def if_member_node(n, label, next_label): + """Returns code snippet for a if_member node. + Jumps to true_label if the Example feature (ENUM) is present in the set of enum values + described in the node. + Control falls through if condition is evaluated to false.""" + members = '|'.join([ + "BIT({feature}_type::{member})".format( + feature=n["feature"], + member=member) + for member in n["set"] + ]) + return "{label}: if(E.{feature} & ({members})) goto {true_label};".format( + label=label, + feature=n["feature"], + members=members, + true_label=next_label) + + +def node(n, label, next_label): + """Returns code snippet for the node.""" + return { + 'boost': boost_node, + 'if_greater': if_greater_node, + 'if_member': if_member_node, + }[n['operation']](n, label, next_label) + + +def tree(t, tree_num: int, node_num: int): + """Returns code for inferencing a Decision Tree. + + A tree starts with its label `t{tree#}`. + A node of the tree starts with label `t{tree#}_n{node#}`. + + The tree contains two types of node: Conditional node and Leaf node. + - Conditional node evaluates a condition. If true, it jumps to the true node/child. + Code is generated using pre-order traversal of the tree considering + false node as the first child. Therefore the false node is always the + immediately next label. + - Leaf node adds the value to the score and jumps to the next tree. + """ + label = "t{tree}_n{node}".format(tree=tree_num, node=node_num) + code = [] + if node_num == 0: + code.append("t{0}:".format(tree_num)) + + if t["operation"] == "boost": + code.append(node(t, label=label, next_label="t{}".format(tree_num+1))) + return code, 1 + + false_code, false_size = tree( + t['else'], tree_num=tree_num, node_num=node_num+1) + + true_node_num = node_num + false_size + 1 + true_label = "t{tree_num}_n{true_node}".format( + tree_num=tree_num, true_node=true_node_num) + + true_code, true_size = tree( + t['then'], tree_num=tree_num, node_num=true_node_num) + + code.append(node(t, label=label, next_label=true_label)) + + return code + false_code + true_code, 1 + false_size+true_size + + +def gen_header_code(features_json: list, cpp_class, filename: str): + """Returns code for header declaring the inference runtime. + + Declares the Example class named {cpp_class} inside relevant namespaces. + The Example class contains all the features as class members. This + class can be used to represent a code completion candidate. + Provides `float Evaluate()` function which can be used to score the Example. + """ + # Header guard + code = """#ifndef {guard} +#define {guard} +#include <cstdint> + +""".format(guard=header_guard(filename)) + + # Namespace begin + code += cpp_class.ns_begin() + "\n" + + setters = [] + for f in features_json: + if f["type"] == "NUMBER": + # Floats are order-encoded to integers for faster comparison. + setters.append("void set{feature}(float V) {{ {feature} = OrderEncode(V); }}".format( + feature=f["name"])) + elif f["type"] == "ENUM": + setters.append("void set{feature}(unsigned V) {{ {feature} = 1 << V; }}".format( + feature=f["name"])) + else: + raise ValueError("Unhandled feature type.", f["type"]) + + # Class members represent all the features of the Example. + class_members = ["uint32_t {feature} = 0;".format( + feature=f["name"]) for f in features_json] + + # Class definition. + code += "class {class_name} {{\n".format(class_name=cpp_class.name) + code += "public:\n" + code += " " + "\n ".join(setters) + "\n" + code += "\n" + code += "private:\n" + code += " " + "\n ".join(class_members) + "\n" + + code += """ + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); +""".format(class_name=cpp_class.name) + + code += " friend float Evaluate(const {}&);\n".format(cpp_class.name) + code += "};\n" + code += "float Evaluate(const {}&);".format(cpp_class.name) + "\n" + + # Namespace end and Header guard. + code += cpp_class.ns_end() + "\n" + code += "#endif // {guard}".format(guard=header_guard(filename)) + return code + + +def order_encode(v: float): + i = struct.unpack('<I', struct.pack('<f', v))[0] + TopBit = 1 << 31 + # IEEE 754 floats compare like sign-magnitude integers. + if (i & TopBit): # Negative float + return (1 << 32) - i # low half of integers, order reversed. + return TopBit + i # top half of integers + + +def evaluate_func(forest_json: list): + """Generates code for `float Evaluate(const Example&)` function. + The generated function can be used to score an Example.""" + code = "float Evaluate(const Example& E) {\n" + lines = [] + lines.append("float Score = 0;") + tree_num = 0 + for tree_json in forest_json: + lines.extend(tree(tree_json, tree_num=tree_num, node_num=0)[0]) + lines.append("") + tree_num += 1 + + lines.append("t{}: // No such tree.".format(len(forest_json))) + lines.append("return Score;") + code += " " + "\n ".join(lines) + code += "\n}" + return code + + +def gen_cpp_code(forest_json: list, features_json: list, filename: str, + cpp_class: CppClass): + """Generates code for the .cpp file.""" + code = "" + # Headers + angled_include = [ # Required by OrderEncode(float F). + "cstring", + "limits", + ] + quoted_include = { + "{}.h".format(filename), # Include generated header. + "llvm/ADT/bit.h", + } + # Headers required by ENUM features used by the model. + quoted_include |= {f["header"] + for f in features_json if f["type"] == "ENUM"} + + code = "\n".join('#include <{h}>'.format(h=h) + for h in angled_include) + "\n\n" + code += "\n".join('#include "{h}"'.format(h=h) + for h in quoted_include) + "\n\n" + code += "#define BIT(X) (1 << X)\n\n" + + # Namespaces Begin. + code += cpp_class.ns_begin() + "\n" + + # using-decl for ENUM features. + code += "\n" + "\n".join("using {feature}_type = {enum};".format( + feature=feature["name"], + enum=feature["enum"]) for feature in features_json if feature["type"] == "ENUM") + "\n" + + # Float order encoding. + code += """ +uint32_t {class_name}::OrderEncode(float F) {{ + static_assert(std::numeric_limits<float>::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{{0}} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast<uint32_t>(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +}} + +""".format(class_name=cpp_class.name) + + code += evaluate_func(forest_json) + "\n" + # Namespaces End. + code += cpp_class.ns_end() + "\n" + return code + + +def main(): + parser = argparse.ArgumentParser('DecisionForestCodegen') + parser.add_argument('--filename', help='output file name.') + parser.add_argument('--output_dir', help='output directory') + parser.add_argument('--model', help='path to model directory') + parser.add_argument( + '--cpp_class', + help='The name of the class (which may be a namespace-qualified) created in generated header.' + ) + ns = parser.parse_args() + + output_dir = ns.output_dir + filename = ns.filename + header_file = "{dir}/{filename}.h".format( + dir=output_dir, filename=filename) + cpp_file = "{dir}/{filename}.cpp".format(dir=output_dir, filename=filename) + cpp_class = CppClass(cpp_class=ns.cpp_class) + + model_file = "{dir}/forest.json".format(dir=ns.model) + features_file = "{dir}/features.json".format(dir=ns.model) + + with open(features_file) as f: + features_json = json.load(f) + + with open(model_file) as m: + forest_json = json.load(m) + + with open(cpp_file, 'w+t') as output_cc: + output_cc.write( + gen_cpp_code(forest_json=forest_json, + features_json=features_json, + filename=filename, + cpp_class=cpp_class)) + + with open(header_file, 'w+t') as output_h: + output_h.write(gen_header_code( + features_json=features_json, cpp_class=cpp_class, filename=filename)) + + +if __name__ == '__main__': + main() Index: clang-tools-extra/clangd/quality/CompletionModel.cmake =================================================================== --- /dev/null +++ clang-tools-extra/clangd/quality/CompletionModel.cmake @@ -0,0 +1,42 @@ +# Run the Completion Model Codegenerator on the model present in the +# ${model} directory. +# Produces a pair of files called ${filename}.h and ${filename}.cpp in the +# ${CMAKE_BINARY_DIR}/generated/decision_forest. The generated header +# will define a C++ class called ${cpp_class} - which may be a +# namespace-qualified class name. +function(gen_decision_forest model filename cpp_class) + set(model_compiler ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py) + + set(model_json ${model}/forest.json) + set(model_features ${model}/features.json) + + set(output_dir ${CMAKE_BINARY_DIR}/generated/decision_forest) + set(header_file ${output_dir}/${filename}.h) + set(cpp_file ${output_dir}/${filename}.cpp) + + add_custom_command(OUTPUT ${header_file} ${cpp_file} + COMMAND "${Python3_EXECUTABLE}" ${model_compiler} + --model ${model} + --output_dir ${output_dir} + --filename ${filename} + --cpp_class ${cpp_class} + COMMENT "Generating code completion model runtime..." + DEPENDS ${model_compiler} ${model_json} ${model_features} + VERBATIM ) + + set_source_files_properties(${header_file} PROPERTIES + GENERATED 1) + set_source_files_properties(${cpp_file} PROPERTIES + GENERATED 1) + + # Disable unused label warning for generated files. + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set_source_files_properties(${cpp_file} PROPERTIES + COMPILE_FLAGS /wd4102) + else() + set_source_files_properties(${cpp_file} PROPERTIES + COMPILE_FLAGS -Wno-unused) + endif() + + set(DECISION_FOREST_OUTPUT_DIR ${output_dir} PARENT_SCOPE) +endfunction() Index: clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h =================================================================== --- /dev/null +++ clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h @@ -0,0 +1,28 @@ +#ifndef GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +#define GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +#include <cstdint> + +namespace ns1 { +namespace ns2 { +namespace test { +class Example { +public: + void setANumber(float V) { ANumber = OrderEncode(V); } + void setAFloat(float V) { AFloat = OrderEncode(V); } + void setACategorical(unsigned V) { ACategorical = 1 << V; } + +private: + uint32_t ANumber = 0; + uint32_t AFloat = 0; + uint32_t ACategorical = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; +float Evaluate(const Example&); +} // namespace test +} // namespace ns2 +} // namespace ns1 +#endif // GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H \ No newline at end of file Index: clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp @@ -0,0 +1,50 @@ +#include <cstring> +#include <limits> + +#include "model/CategoricalFeature.h" +#include "llvm/ADT/bit.h" +#include "DecisionForestRuntimeTest.h" + +#define BIT(X) (1 << X) + +namespace ns1 { +namespace ns2 { +namespace test { + +using ACategorical_type = ns1::ns2::TestEnum; + +uint32_t Example::OrderEncode(float F) { + static_assert(std::numeric_limits<float>::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast<uint32_t>(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if(E.ANumber >= 3276275712 /*200.0*/) goto t0_n4; + t0_n1: if(E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::C))) goto t0_n3; + t0_n2: Score += -4.0; goto t1; + t0_n3: Score += 3.0; goto t1; + t0_n4: if(E.AFloat >= 1082130432 /*-1*/) goto t0_n6; + t0_n5: Score += -20.0; goto t1; + t0_n6: Score += 10.0; goto t1; + + t1: + t1_n0: if(E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::B))) goto t1_n2; + t1_n1: Score += -6.0; goto t2; + t1_n2: Score += 5.0; goto t2; + + t2: // No such tree. + return Score; +} +} // namespace test +} // namespace ns2 +} // namespace ns1 Index: clang-tools-extra/clangd/for-review-only/CompletionModel.h =================================================================== --- /dev/null +++ clang-tools-extra/clangd/for-review-only/CompletionModel.h @@ -0,0 +1,22 @@ +#ifndef GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H +#define GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H +#include <cstdint> + +namespace clang { +namespace clangd { +class Example { +public: + void setContextKind(unsigned V) { ContextKind = 1 << V; } + +private: + uint32_t ContextKind = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; +float Evaluate(const Example&); +} // namespace clangd +} // namespace clang +#endif // GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H \ No newline at end of file Index: clang-tools-extra/clangd/for-review-only/CompletionModel.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/for-review-only/CompletionModel.cpp @@ -0,0 +1,39 @@ +#include <cstring> +#include <limits> + +#include "llvm/ADT/bit.h" +#include "clang/Sema/CodeCompleteConsumer.h" +#include "CompletionModel.h" + +#define BIT(X) (1 << X) + +namespace clang { +namespace clangd { + +using ContextKind_type = clang::CodeCompletionContext::Kind; + +uint32_t Example::OrderEncode(float F) { + static_assert(std::numeric_limits<float>::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast<uint32_t>(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if(E.ContextKind & (BIT(ContextKind_type::CCC_DotMemberAccess)|BIT(ContextKind_type::CCC_ArrowMemberAccess))) goto t0_n2; + t0_n1: Score += 1.0; goto t1; + t0_n2: Score += 3.0; goto t1; + + t1: // No such tree. + return Score; +} +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -28,6 +28,9 @@ FrontendOpenMP Option ) + +include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake) +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model CompletionModel clang::clangd::Example) if(MSVC AND NOT CLANG_CL) set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant @@ -77,6 +80,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp + ${DECISION_FOREST_OUTPUT_DIR}/CompletionModel.cpp index/Background.cpp index/BackgroundIndexLoader.cpp @@ -117,6 +121,10 @@ omp_gen ) +target_include_directories(clangDaemon PUBLIC + $<BUILD_INTERFACE:${DECISION_FOREST_OUTPUT_DIR}> +) + clang_target_link_libraries(clangDaemon PRIVATE clangAST
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits