================ @@ -0,0 +1,665 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/DebugLog.h" +#include "llvm/Support/StringSaver.h" +#include <cassert> +#include <memory> +#include <string> + +#define DEBUG_TYPE "clang-doc" + +using namespace llvm; + +namespace clang::doc::markdown { + +// Allocates a contiguous array of T in the arena and returns an ArrayRef. +template <typename T> +static ArrayRef<T> allocateArray(SmallVectorImpl<T> &Vec, + BumpPtrAllocator &Arena) { + if (Vec.empty()) + return {}; + T *Allocated = Arena.Allocate<T>(Vec.size()); + std::uninitialized_copy(Vec.begin(), Vec.end(), Allocated); + return ArrayRef<T>(Allocated, Vec.size()); +} + +// A line is a table separator if it only contains |, -, :, and spaces, +// and has at least one -. +static bool isSepRow(StringRef Line) { + return Line.contains('-') && + Line.find_first_not_of("|-: ") == StringRef::npos; +} + +// Returns true if Line begins with a bullet list marker (-, *, or +) +// followed by a space. +static bool isListItem(StringRef Line) { + return Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ "); +} + +// Returns true if Line begins with an ordered list marker: one or more digits +// followed by a period and a space (e.g. "1. ", "42. "). +static bool isOrderedListItem(StringRef Line) { + size_t Dot = Line.find_first_not_of("0123456789"); + return Dot != StringRef::npos && Dot > 0 && Line[Dot] == '.' && + Dot + 1 < Line.size() && Line[Dot + 1] == ' '; +} + +// Returns true if Line is a thematic break: three or more matching -, *, or _ +// characters, optionally separated by spaces, with nothing else. Line is +// expected to be trimmed. +static bool isThematicBreak(StringRef Line) { + char Marker = Line.empty() ? '\0' : Line[0]; + if (Marker != '-' && Marker != '*' && Marker != '_') + return false; + unsigned Count = 0; + for (char C : Line) { + if (C == Marker) + ++Count; + else if (C != ' ') + return false; + } + return Count >= 3; +} + +// Returns true if Line is a block quote line: it starts with "> ", or is a bare +// ">" marking an empty quote line. +static bool isBlockQuote(StringRef Line) { + return Line.starts_with("> ") || Line == ">"; +} + +// Returns the ATX heading level (1 to 6) when Line is an ATX heading: one to +// six leading # characters followed by a space. Returns 0 otherwise, so seven +// or more # characters fall back to plain text. +static unsigned atxHeadingLevel(StringRef Line) { + size_t Level = Line.find_first_not_of('#'); + if (Level == StringRef::npos || Level < 1 || Level > 6 || Line[Level] != ' ') + return 0; + return Level; +} + +// A forward cursor over the lines of a paragraph. Lines are stored untrimmed; +// callers trim where they need a normalized view. +class LineReader { +public: + explicit LineReader(ArrayRef<StringRef> Lines) : Lines(Lines) {} + + // True once every line has been consumed. + bool atEnd() const { return Pos >= Lines.size(); } + + // The current line, untrimmed. Must not be called when atEnd(). + StringRef peek() const { + assert(!atEnd() && "peek past end of input"); + return Lines[Pos]; + } + + // The line Offset positions ahead of the cursor, or an empty StringRef when + // that position is past the end. peek(0) is the current line. + StringRef peek(size_t Offset) const { + size_t Target = Pos + Offset; + return Target < Lines.size() ? Lines[Target] : StringRef(); + } + + // Consume the current line and return it, untrimmed. Must not be called when + // atEnd(). + StringRef advance() { + assert(!atEnd() && "advance past end of input"); + return Lines[Pos++]; + } + +private: + ArrayRef<StringRef> Lines; + size_t Pos = 0; +}; + +// A forward cursor over the characters of a string. position() and seek() let +// it interoperate with the index-based run and delimiter helpers below. +class CharReader { +public: + explicit CharReader(StringRef S) : S(S) {} + + // True once every character has been consumed. + bool atEnd() const { return Pos >= S.size(); } + + // The current character. Must not be called when atEnd(). + char peek() const { + assert(!atEnd() && "peek past end of input"); + return S[Pos]; + } + + // Consume the current character and return it. Must not be called when + // atEnd(). + char advance() { + assert(!atEnd() && "advance past end of input"); + return S[Pos++]; + } + + // The current scan position, for substring, run, and delimiter computations. + size_t position() const { return Pos; } + + // Move the cursor to an absolute position, used to skip past a matched span. + void seek(size_t NewPos) { Pos = NewPos; } + +private: + StringRef S; + size_t Pos = 0; +}; + +// Returns the number of consecutive copies of C starting at S[Start]. +static size_t countRun(StringRef S, size_t Start, char C) { + size_t I = Start; + while (I < S.size() && S[I] == C) + ++I; + return I - Start; ---------------- ilovepi wrote:
I'm pretty sure you can just use `first_not_of()` for this exact purpose. https://github.com/llvm/llvm-project/pull/202991 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
