bkietz commented on a change in pull request #7120:
URL: https://github.com/apache/arrow/pull/7120#discussion_r422259138



##########
File path: cpp/src/arrow/util/value_parsing.cc
##########
@@ -43,46 +44,43 @@ struct StringToFloatConverter::Impl {
   util::double_conversion::StringToDoubleConverter fallback_converter_;
 };
 
-constexpr int StringToFloatConverter::Impl::flags_;
-constexpr double StringToFloatConverter::Impl::main_junk_value_;
-constexpr double StringToFloatConverter::Impl::fallback_junk_value_;
+static StringToFloatConverterImpl g_string_to_float;

Review comment:
       To make immutability/thread safety clearer:
   ```suggestion
   static const StringToFloatConverterImpl g_string_to_float;
   ```

##########
File path: cpp/src/arrow/util/value_parsing.h
##########
@@ -55,26 +55,18 @@ class ARROW_EXPORT TimestampParser {
 
 namespace internal {
 
-/// \brief A class providing conversion from strings to some Arrow data types
-///
-/// Conversion is triggered by calling operator().  It returns true on
-/// success, false on failure.
-///
-/// The class may have a non-trivial construction cost in some cases,
-/// so it's recommended to use a single instance many times, if doing bulk
-/// conversion. Instances of this class are not guaranteed to be thread-safe.
-///
-template <typename ARROW_TYPE, typename Enable = void>
-class StringConverter;
+namespace detail {
 
-template <>
-class StringConverter<BooleanType> {
- public:
-  explicit StringConverter(const std::shared_ptr<DataType>& = NULLPTR) {}
+template <typename ARROW_TYPE>
+struct StringConverter {
+  static bool Convert(const char*, size_t, void*) { return false; }

Review comment:
       This allows construction of an always-fails StringConverter for any 
arrow type, replacing the previous behavior under which `StringConverter<T>` 
was a compile error if conversion for `T` is not defined. This replacement 
seems extremely dubious to me, especially if not clearly documented. Please 
revert this and any instantiations of `StringConverter<T>` or `ParseValue<T>` 
which are undefined.

##########
File path: cpp/src/arrow/util/value_parsing.h
##########
@@ -484,6 +455,14 @@ static inline bool ParseHH_MM_SS(const char* s, 
std::chrono::duration<ts_type>*
 
 }  // namespace detail
 
+/// \brief Attempt to convert a string to the primitive type corresponding to
+/// an Arrow data type
+template <typename T, typename ParseContext = void>
+inline bool ParseValue(const char* s, size_t length, typename T::c_type* out,
+                       const ParseContext* ctx = NULLPTR) {
+  return detail::StringConverter<T>::Convert(s, length, out);
+}
+

Review comment:
       This declaration (and the specialization below) make the requirements on 
ParseContext extremely unclear. It's optional and may be of any type... except 
when parsing timestamps. Instead it seems better to make ParseContext required 
for all types and empty for the majority: 
   ```suggestion
   template <typename T>
   struct ParseContext {};
   
   template <>
   struct ParseContext<TimestampType> {
     TimeUnit::type unit;
   };
   
   /// \brief Attempt to convert a string to the primitive type corresponding to
   /// an Arrow data type
   template <typename T, typename Converter = detail::StringConverter<T>>
   inline bool ParseValue(const ParseContext<T>& ctx, util::string_view s, 
typename T::c_type* out) {
     return Converter{}::Convert(s.data(), s.size(), out);
   }
   
   template <>
   inline bool ParseValue<TimestampType>(const ParseContext<TimestampType>& ctx,
       util::string_view s, typename T::c_type* out) {
     return ParseTimestampISO8601(s.data(), s.size(), ctx.unit, out);
   }
   ```
   
   This makes the usage of a parse context clearer and does not add 
significantly to boilerplate at any call site:
   ```c++
   ParseValue<Int8Type>({}, "42", &value)
   ParseValue<TimestampType>({TimeUnit::NANO}, ts_str, &ts_value)
   ```
   
   Bonus: the out argument is the last argument again.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to