traeak commented on code in PR #11014: URL: https://github.com/apache/trafficserver/pull/11014#discussion_r1504723547
########## src/tsutil/Regex.cc: ########## @@ -26,160 +26,281 @@ #include <array> #include <assert.h> -#if __has_include(<pcre/pcre.h>) -#include <pcre/pcre.h> -#else -#include <pcre.h> -#endif - +//---------------------------------------------------------------------------- namespace { -inline pcre * -as_pcre(void *p) +void * +my_malloc(size_t size, void * /*caller*/) { - return static_cast<pcre *>(p); + void *ptr = malloc(size); + return ptr; } -inline pcre_extra * -as_extra(void *p) + +void +my_free(void *ptr, void * /*caller*/) { - return static_cast<pcre_extra *>(p); + free(ptr); } } // namespace -#ifdef PCRE_CONFIG_JIT -/* -Using two thread locals avoids the deadlock because without the thread local object access, get_jit_stack doesn't call -the TLS init function which ends up calling __cxx_thread_atexit(which locks the dl_whatever mutex). Since the raw -pointer doesn't have a destructor to call, it doesn't need to call this. Interestingly, get_jit_stack was calling the -TLS init function to setup the destructor call at thread exit whether or not the class was declared in the function -body. -*/ -namespace +//---------------------------------------------------------------------------- +class RegexContext { -thread_local pcre_jit_stack *jit_stack; - -struct JitStackCleanup { - ~JitStackCleanup() +public: + static RegexContext * + get_instance() + { + if (!_regex_context) { + _regex_context = new RegexContext(); + } + return _regex_context; + } + ~RegexContext() { - if (jit_stack) { - pcre_jit_stack_free(jit_stack); + if (_general_context) { + pcre2_general_context_free(_general_context); + } + if (_compile_context) { + pcre2_compile_context_free(_compile_context); + } + if (_match_context) { + pcre2_match_context_free(_match_context); + } + if (_jit_stack) { + pcre2_jit_stack_free(_jit_stack); } } + pcre2_general_context * + get_general_context() + { + return _general_context; + } + pcre2_compile_context * + get_compile_context() + { + return _compile_context; + } + pcre2_match_context * + get_match_context() + { + return _match_context; + } + +private: + RegexContext() + { + _general_context = pcre2_general_context_create(my_malloc, my_free, nullptr); + _compile_context = pcre2_compile_context_create(_general_context); + _match_context = pcre2_match_context_create(_general_context); + _jit_stack = pcre2_jit_stack_create(4096, 1024 * 1024, nullptr); // 1 page min and 1MB max + pcre2_jit_stack_assign(_match_context, nullptr, _jit_stack); + } + pcre2_general_context *_general_context = nullptr; + pcre2_compile_context *_compile_context = nullptr; + pcre2_match_context *_match_context = nullptr; + pcre2_jit_stack *_jit_stack = nullptr; + thread_local static RegexContext *_regex_context; }; -thread_local JitStackCleanup jsc; +thread_local RegexContext *RegexContext::_regex_context = nullptr; -pcre_jit_stack * -get_jit_stack(void *) +//---------------------------------------------------------------------------- +namespace { - if (!jit_stack) { - jit_stack = pcre_jit_stack_alloc(4096, 1024 * 1024); // 1 page min and 1MB max - } - return jit_stack; -} - -} // end anonymous namespace -#endif // def PCRE_CONFIG_JIT +struct RegexContextCleanup { + ~RegexContextCleanup() { delete RegexContext::get_instance(); } +}; +thread_local RegexContextCleanup cleanup; +} // namespace -Regex::Regex(Regex &&that) noexcept : regex(that.regex), regex_extra(that.regex_extra) +//---------------------------------------------------------------------------- +RegexMatches::RegexMatches(uint32_t size) { - that.regex = nullptr; - that.regex_extra = nullptr; + pcre2_general_context *ctx = pcre2_general_context_create( + &RegexMatches::malloc, [](void *, void *) -> void {}, static_cast<void *>(this)); + + _match_data = pcre2_match_data_create(size, ctx); } -bool -Regex::compile(const char *pattern, const unsigned flags) +//---------------------------------------------------------------------------- +void * +RegexMatches::malloc(size_t size, void *caller) { - const char *error = nullptr; - int erroffset = 0; - int options = 0; - int study_opts = 0; + auto *matches = static_cast<RegexMatches *>(caller); - if (regex) { - return false; + // allocate from the buffer if possible + if (size <= sizeof(matches->_buffer) - matches->_buffer_bytes_used) { + void *ptr = matches->_buffer + matches->_buffer_bytes_used; + matches->_buffer_bytes_used += size; + return ptr; } - if (flags & RE_CASE_INSENSITIVE) { - options |= PCRE_CASELESS; - } + // otherwise use system malloc if the buffer is too small + void *ptr = ::malloc(size); + return ptr; +} - if (flags & RE_ANCHORED) { - options |= PCRE_ANCHORED; +//---------------------------------------------------------------------------- +RegexMatches::~RegexMatches() +{ + if (_match_data) { + pcre2_match_data_free(_match_data); } +} - regex = pcre_compile(pattern, options, &error, &erroffset, nullptr); - if (error) { - regex = nullptr; - return false; - } +//---------------------------------------------------------------------------- +size_t * +RegexMatches::get_ovector_pointer() +{ + return pcre2_get_ovector_pointer(_match_data); +} + +//---------------------------------------------------------------------------- +int32_t +RegexMatches::size() const +{ + return _size; +} + +//---------------------------------------------------------------------------- +pcre2_match_data * +RegexMatches::get_match_data() +{ + return _match_data; +} -#ifdef PCRE_CONFIG_JIT - study_opts |= PCRE_STUDY_JIT_COMPILE; -#endif +//---------------------------------------------------------------------------- +void +RegexMatches::set_size(int32_t size) +{ + _size = size; +} - regex_extra = pcre_study(as_pcre(regex), study_opts, &error); +//---------------------------------------------------------------------------- +void +RegexMatches::set_subject(std::string_view subject) +{ + _subject = subject; +} -#ifdef PCRE_CONFIG_JIT - if (regex_extra) { - pcre_assign_jit_stack(as_extra(regex_extra), &get_jit_stack, nullptr); +//---------------------------------------------------------------------------- +std::string_view +RegexMatches::operator[](size_t index) const +{ + // check if the index is valid + if (index >= pcre2_get_ovector_count(_match_data)) { + return std::string_view(); } -#endif - return true; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(_match_data); + return std::string_view(_subject.data() + ovector[2 * index], ovector[2 * index + 1] - ovector[2 * index]); } -int -Regex::get_capture_count() +//---------------------------------------------------------------------------- +Regex::Regex(Regex &&that) noexcept { - int captures = -1; - if (pcre_fullinfo(as_pcre(regex), as_extra(regex_extra), PCRE_INFO_CAPTURECOUNT, &captures) != 0) { - return -1; - } + _code = that._code; + that._code = nullptr; +} - return captures; +//---------------------------------------------------------------------------- +Regex::~Regex() +{ + if (_code) { Review Comment: pld check against nullptr ########## src/tsutil/Regex.cc: ########## @@ -26,160 +26,281 @@ #include <array> #include <assert.h> -#if __has_include(<pcre/pcre.h>) -#include <pcre/pcre.h> -#else -#include <pcre.h> -#endif - +//---------------------------------------------------------------------------- namespace { -inline pcre * -as_pcre(void *p) +void * +my_malloc(size_t size, void * /*caller*/) { - return static_cast<pcre *>(p); + void *ptr = malloc(size); + return ptr; } -inline pcre_extra * -as_extra(void *p) + +void +my_free(void *ptr, void * /*caller*/) { - return static_cast<pcre_extra *>(p); + free(ptr); } } // namespace -#ifdef PCRE_CONFIG_JIT -/* -Using two thread locals avoids the deadlock because without the thread local object access, get_jit_stack doesn't call -the TLS init function which ends up calling __cxx_thread_atexit(which locks the dl_whatever mutex). Since the raw -pointer doesn't have a destructor to call, it doesn't need to call this. Interestingly, get_jit_stack was calling the -TLS init function to setup the destructor call at thread exit whether or not the class was declared in the function -body. -*/ -namespace +//---------------------------------------------------------------------------- +class RegexContext { -thread_local pcre_jit_stack *jit_stack; - -struct JitStackCleanup { - ~JitStackCleanup() +public: + static RegexContext * + get_instance() + { + if (!_regex_context) { + _regex_context = new RegexContext(); + } + return _regex_context; + } + ~RegexContext() { - if (jit_stack) { - pcre_jit_stack_free(jit_stack); + if (_general_context) { + pcre2_general_context_free(_general_context); + } + if (_compile_context) { + pcre2_compile_context_free(_compile_context); + } + if (_match_context) { + pcre2_match_context_free(_match_context); + } + if (_jit_stack) { + pcre2_jit_stack_free(_jit_stack); } } + pcre2_general_context * + get_general_context() + { + return _general_context; + } + pcre2_compile_context * + get_compile_context() + { + return _compile_context; + } + pcre2_match_context * + get_match_context() + { + return _match_context; + } + +private: + RegexContext() + { + _general_context = pcre2_general_context_create(my_malloc, my_free, nullptr); + _compile_context = pcre2_compile_context_create(_general_context); + _match_context = pcre2_match_context_create(_general_context); + _jit_stack = pcre2_jit_stack_create(4096, 1024 * 1024, nullptr); // 1 page min and 1MB max + pcre2_jit_stack_assign(_match_context, nullptr, _jit_stack); + } + pcre2_general_context *_general_context = nullptr; + pcre2_compile_context *_compile_context = nullptr; + pcre2_match_context *_match_context = nullptr; + pcre2_jit_stack *_jit_stack = nullptr; + thread_local static RegexContext *_regex_context; }; -thread_local JitStackCleanup jsc; +thread_local RegexContext *RegexContext::_regex_context = nullptr; -pcre_jit_stack * -get_jit_stack(void *) +//---------------------------------------------------------------------------- +namespace { - if (!jit_stack) { - jit_stack = pcre_jit_stack_alloc(4096, 1024 * 1024); // 1 page min and 1MB max - } - return jit_stack; -} - -} // end anonymous namespace -#endif // def PCRE_CONFIG_JIT +struct RegexContextCleanup { + ~RegexContextCleanup() { delete RegexContext::get_instance(); } +}; +thread_local RegexContextCleanup cleanup; +} // namespace -Regex::Regex(Regex &&that) noexcept : regex(that.regex), regex_extra(that.regex_extra) +//---------------------------------------------------------------------------- +RegexMatches::RegexMatches(uint32_t size) { - that.regex = nullptr; - that.regex_extra = nullptr; + pcre2_general_context *ctx = pcre2_general_context_create( + &RegexMatches::malloc, [](void *, void *) -> void {}, static_cast<void *>(this)); + + _match_data = pcre2_match_data_create(size, ctx); } -bool -Regex::compile(const char *pattern, const unsigned flags) +//---------------------------------------------------------------------------- +void * +RegexMatches::malloc(size_t size, void *caller) { - const char *error = nullptr; - int erroffset = 0; - int options = 0; - int study_opts = 0; + auto *matches = static_cast<RegexMatches *>(caller); - if (regex) { - return false; + // allocate from the buffer if possible + if (size <= sizeof(matches->_buffer) - matches->_buffer_bytes_used) { + void *ptr = matches->_buffer + matches->_buffer_bytes_used; + matches->_buffer_bytes_used += size; + return ptr; } - if (flags & RE_CASE_INSENSITIVE) { - options |= PCRE_CASELESS; - } + // otherwise use system malloc if the buffer is too small + void *ptr = ::malloc(size); + return ptr; +} - if (flags & RE_ANCHORED) { - options |= PCRE_ANCHORED; +//---------------------------------------------------------------------------- +RegexMatches::~RegexMatches() +{ + if (_match_data) { + pcre2_match_data_free(_match_data); } +} - regex = pcre_compile(pattern, options, &error, &erroffset, nullptr); - if (error) { - regex = nullptr; - return false; - } +//---------------------------------------------------------------------------- +size_t * +RegexMatches::get_ovector_pointer() +{ + return pcre2_get_ovector_pointer(_match_data); +} + +//---------------------------------------------------------------------------- +int32_t +RegexMatches::size() const +{ + return _size; +} + +//---------------------------------------------------------------------------- +pcre2_match_data * +RegexMatches::get_match_data() +{ + return _match_data; +} -#ifdef PCRE_CONFIG_JIT - study_opts |= PCRE_STUDY_JIT_COMPILE; -#endif +//---------------------------------------------------------------------------- +void +RegexMatches::set_size(int32_t size) +{ + _size = size; +} - regex_extra = pcre_study(as_pcre(regex), study_opts, &error); +//---------------------------------------------------------------------------- +void +RegexMatches::set_subject(std::string_view subject) +{ + _subject = subject; +} -#ifdef PCRE_CONFIG_JIT - if (regex_extra) { - pcre_assign_jit_stack(as_extra(regex_extra), &get_jit_stack, nullptr); +//---------------------------------------------------------------------------- +std::string_view +RegexMatches::operator[](size_t index) const +{ + // check if the index is valid + if (index >= pcre2_get_ovector_count(_match_data)) { + return std::string_view(); } -#endif - return true; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(_match_data); + return std::string_view(_subject.data() + ovector[2 * index], ovector[2 * index + 1] - ovector[2 * index]); } -int -Regex::get_capture_count() +//---------------------------------------------------------------------------- +Regex::Regex(Regex &&that) noexcept { - int captures = -1; - if (pcre_fullinfo(as_pcre(regex), as_extra(regex_extra), PCRE_INFO_CAPTURECOUNT, &captures) != 0) { - return -1; - } + _code = that._code; + that._code = nullptr; +} - return captures; +//---------------------------------------------------------------------------- +Regex::~Regex() +{ + if (_code) { Review Comment: pls check against nullptr -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@trafficserver.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org