================ @@ -0,0 +1,657 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/DebugLog.h" +#include "llvm/Support/StringSaver.h" +#include <cassert> +#include <memory> +#include <string> + +#define DEBUG_TYPE "clang-doc" + +using namespace llvm; + +namespace clang::doc::markdown { + +// Allocates a contiguous array of T in the arena and returns an ArrayRef. +template <typename T> +static ArrayRef<T> allocateArray(SmallVectorImpl<T> &Vec, + BumpPtrAllocator &Arena) { + if (Vec.empty()) + return {}; + T *Allocated = Arena.Allocate<T>(Vec.size()); + std::uninitialized_copy(Vec.begin(), Vec.end(), Allocated); + return ArrayRef<T>(Allocated, Vec.size()); +} + +// A line is a table separator if it only contains |, -, :, and spaces, +// and has at least one -. +static bool isSepRow(StringRef Line) { + return Line.contains('-') && + Line.find_first_not_of("|-: ") == StringRef::npos; +} + +// Returns true if Line begins with a bullet list marker (-, *, or +) +// followed by a space. +static bool isListItem(StringRef Line) { + return Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ "); +} + +// Returns true if Line begins with an ordered list marker: one or more digits +// followed by a period and a space (e.g. "1. ", "42. "). +static bool isOrderedListItem(StringRef Line) { + size_t Dot = Line.find_first_not_of("0123456789"); + return Dot != StringRef::npos && Dot > 0 && Line[Dot] == '.' && + Dot + 1 < Line.size() && Line[Dot + 1] == ' '; +} + +// Returns true if Line is a thematic break: three or more matching -, *, or _ +// characters, optionally separated by spaces, with nothing else. Line is +// expected to be trimmed. +static bool isThematicBreak(StringRef Line) { + char Marker = Line.empty() ? '\0' : Line[0]; + if (Marker != '-' && Marker != '*' && Marker != '_') + return false; + // Only the marker and spaces may appear, with at least three markers. + const char Allowed[] = {Marker, ' '}; + return Line.find_first_not_of(StringRef(Allowed, 2)) == StringRef::npos && + Line.count(Marker) >= 3; +} + +// Returns true if Line is a block quote line: it starts with "> ", or is a bare +// ">" marking an empty quote line. +static bool isBlockQuote(StringRef Line) { + return Line.starts_with("> ") || Line == ">"; +} + +// Returns the ATX heading level (1 to 6) when Line is an ATX heading: one to +// six leading # characters followed by a space. Returns 0 otherwise, so seven +// or more # characters fall back to plain text. +static unsigned atxHeadingLevel(StringRef Line) { + size_t Level = Line.find_first_not_of('#'); + if (Level == StringRef::npos || Level < 1 || Level > 6 || Line[Level] != ' ') + return 0; + return Level; +} + +// A forward cursor over the lines of a paragraph. Lines are stored untrimmed; +// callers trim where they need a normalized view. +class LineReader { +public: + explicit LineReader(ArrayRef<StringRef> Lines) : Lines(Lines) {} + + // True once every line has been consumed. + bool atEnd() const { return Pos >= Lines.size(); } + + // The current line, untrimmed. Must not be called when atEnd(). + StringRef peek() const { + assert(!atEnd() && "peek past end of input"); + return Lines[Pos]; + } + + // The line Offset positions ahead of the cursor, or an empty StringRef when + // that position is past the end. peek(0) is the current line. + StringRef peek(size_t Offset) const { + size_t Target = Pos + Offset; + return Target < Lines.size() ? Lines[Target] : StringRef(); + } + + // Consume the current line and return it, untrimmed. Must not be called when + // atEnd(). + StringRef advance() { + assert(!atEnd() && "advance past end of input"); + return Lines[Pos++]; + } + +private: + ArrayRef<StringRef> Lines; + size_t Pos = 0; +}; + +// A forward cursor over the characters of a string. position() and seek() let +// it interoperate with the index-based run and delimiter helpers below. +class CharReader { +public: ---------------- ilovepi wrote:
Thinking about this more .... This is just a poor man's iterator. `peek()` just gives the current value, which is just `*it` in iterator terms. So I think all your algorithms can just either be converted to take iterators or be range based. https://github.com/llvm/llvm-project/pull/202991 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
