github-actions[bot] commented on code in PR #17586:
URL: https://github.com/apache/doris/pull/17586#discussion_r1133097228
##########
be/src/io/fs/file_system.h:
##########
@@ -21,58 +21,116 @@
#include "common/status.h"
#include "gutil/macros.h"
-#include "io/fs/file_reader.h"
#include "io/fs/file_reader_options.h"
-#include "io/fs/file_writer.h"
#include "io/fs/path.h"
+#include "io/fs/file_reader_writer_fwd.h"
+#include "io/io_common.h"
namespace doris {
namespace io {
-class FileWriter;
-class FileReader;
-
enum class FileSystemType : uint8_t {
LOCAL,
S3,
HDFS,
BROKER,
};
+struct FileInfo {
+ // only file name, no path
+ std::string file_name;
+ size_t file_size;
+ bool is_file;
+};
+
class FileSystem : public std::enable_shared_from_this<FileSystem> {
public:
+ // The following are public interface.
+ // And derived classes should implement all xxx_impl methods.
+ Status create_file(const Path& file, FileWriterPtr* writer);
+ Status open_file(const Path& file, FileReaderSPtr* reader, IOContext*
io_ctx) {
+ return open_file(file, reader, io_ctx);
+ }
+ Status open_file(const Path& file, const FileReaderOptions& reader_options,
+ FileReaderSPtr* reader, IOContext* io_ctx);
+ Status create_directory(const Path& dir);
+ Status delete_file(const Path& file);
+ Status delete_directory(const Path& dir);
+ Status batch_delete(const std::vector<Path>& files);
+ Status exists(const Path& path, bool* res) const;
+ Status file_size(const Path& file, size_t* file_size) const;
+ Status list(const Path& dir, bool regular_file, std::vector<FileInfo>*
files);
+ Status rename(const Path& orig_name, const Path& new_name);
+ Status rename_dir(const Path& orig_name, const Path& new_name);
+
+ std::shared_ptr<FileSystem> getSPtr() { return shared_from_this(); }
+
+public:
+ // the root path of this fs.
+ // if not empty, all given Path will be "_root_path/path"
+ const Path& root_path() const { return _root_path; }
+ // a unique id of this fs.
+ // used for cache or re-use.
+ // can be empty if not used
+ const std::string& id() const { return _id; }
+ // file system type
+ FileSystemType type() const { return _type; }
+
virtual ~FileSystem() = default;
+ // Each derived class should implement create method to create fs.
DISALLOW_COPY_AND_ASSIGN(FileSystem);
- virtual Status create_file(const Path& path, FileWriterPtr* writer) = 0;
+protected:
+ /// create file and return a FileWriter
+ virtual Status create_file_impl(const Path& file, FileWriterPtr* writer) =
0;
- virtual Status open_file(const Path& path, const FileReaderOptions&
reader_options,
+ /// open file and return a FileReader
+ virtual Status open_file_impl(const Path& file, const FileReaderOptions&
reader_options,
FileReaderSPtr* reader, IOContext* io_ctx) = 0;
- virtual Status open_file(const Path& path, FileReaderSPtr* reader,
IOContext* io_ctx) = 0;
+ /// create directory recursively
+ virtual Status create_directory_impl(const Path& dir) = 0;
- virtual Status delete_file(const Path& path) = 0;
+ /// delete file.
+ /// return OK if file does not exist
+ /// return ERR if not a regular file
+ virtual Status delete_file_impl(const Path& file) = 0;
- // create directory recursively
- virtual Status create_directory(const Path& path) = 0;
+ /// delete all files in "files"
+ virtual Status batch_delete_impl(const std::vector<Path>& files) = 0;
- // remove all under directory recursively
- virtual Status delete_directory(const Path& path) = 0;
+ /// remove all under directory recursively
+ /// return OK if dir does not exist
+ /// return ERR if not a dir
+ virtual Status delete_directory_impl(const Path& dir) = 0;
- // hard link `src` to `dest`
- // FIXME(cyx): Should we move this method to LocalFileSystem?
- virtual Status link_file(const Path& src, const Path& dest) = 0;
+ /// check if path exist
+ /// return OK and res = 1 means exist, res = 0 means does not exist
+ /// return ERR otherwise
+ virtual Status exists_impl(const Path& path, bool* res) const = 0;
- virtual Status exists(const Path& path, bool* res) const = 0;
+ /// return OK and get size of given file, save in "file_size".
+ /// return ERR otherwise
+ virtual Status file_size_impl(const Path& file, size_t* file_size) const =
0;
- virtual Status file_size(const Path& path, size_t* file_size) const = 0;
+ /// return OK and list all objects in "dir", save in "files"
+ /// return ERR otherwise
+ /// will not traverse dir recursively.
+ virtual Status list_impl(const Path& dir, bool regular_file,
std::vector<FileInfo>* files) = 0;
- virtual Status list(const Path& path, std::vector<Path>* files) = 0;
+ /// rename file from orig_name to new_name
+ virtual Status rename_impl(const Path& orig_name, const Path& new_name) =
0;
- const Path& root_path() const { return _root_path; }
- const std::string& id() const { return _id; }
- FileSystemType type() const { return _type; }
+ /// rename dir from orig_name to new_name
+ virtual Status rename_dir_impl(const Path& orig_name, const Path&
new_name) = 0;
+
+ Path absolute_path(const Path& path) const {
+ if (path.is_absolute()) {
+ return path;
+ }
+ return _root_path / path;
+ }
protected:
Review Comment:
warning: redundant access specifier has the same accessibility as the
previous access specifier [readability-redundant-access-specifiers]
```suggestion
```
**be/src/io/fs/file_system.h:83:** previously declared here
```cpp
protected:
^
```
##########
be/src/runtime/snapshot_loader.cpp:
##########
@@ -628,4 +639,22 @@ Status SnapshotLoader::_report_every(int report_threshold,
int* counter, int32_t
return Status::OK();
}
+Status SnapshotLoader::_list_with_checksum(const std::string& dir,
std::map<std::string, FileStat>* md5_files) {
+ std::vector<io::FileInfo> files;
+ RETURN_IF_ERROR(_remote_fs->list(dir, true, &files));
+ for (auto& tmp_file : files) {
+ std::string& file = tmp_file.file_name;
+ size_t pos = file.find_last_of(".");
Review Comment:
warning: 'find_last_of' called with a string literal consisting of a single
character; consider using the more effective overload accepting a character
[performance-faster-string-find]
```suggestion
size_t pos = file.find_last_of('.');
```
##########
be/src/io/fs/remote_file_system.h:
##########
@@ -28,27 +28,50 @@ class RemoteFileSystem : public FileSystem {
: FileSystem(std::move(root_path), std::move(id), type) {}
~RemoteFileSystem() override = default;
- // `local_path` should be an absolute path on local filesystem.
- virtual Status upload(const Path& local_path, const Path& dest_path) = 0;
+ Status upload(const Path& local_file, const Path& dest_file);
+ Status batch_upload(const std::vector<Path>& local_files,
+ const std::vector<Path>& remote_files);
+ Status direct_upload(const Path& remote_file, const std::string& content);
+ Status upload_with_checksum(const Path& local_file, const Path& remote,
+ const std::string& checksum);
+ Status download(const Path& remote_file, const Path& local);
+ Status direct_download(const Path& remote_file, std::string* content);
- virtual Status batch_upload(const std::vector<Path>& local_paths,
- const std::vector<Path>& dest_paths) = 0;
+ Status connect();
- virtual Status batch_delete(const std::vector<Path>& paths) {
- return Status::NotSupported("batch_delete");
- }
+protected:
+ /// connect to remote file system
+ virtual Status connect_impl() = 0;
- virtual Status connect() = 0;
+ virtual Status open_file_impl(const Path& file, const FileReaderOptions&
reader_options,
Review Comment:
warning: 'virtual' is redundant since the function is already declared
'override' [modernize-use-override]
```suggestion
Status open_file_impl(const Path& file, const FileReaderOptions&
reader_options,
```
##########
be/src/io/fs/s3_file_system.cpp:
##########
@@ -51,6 +53,23 @@ namespace io {
}
#endif
+#ifndef CHECK_S3_PATH
+#define CHECK_S3_PATH(uri, path) \
Review Comment:
warning: macro is not used [clang-diagnostic-unused-macros]
```cpp
#define CHECK_S3_PATH(uri, path) \
^
```
##########
be/src/io/fs/file_system.h:
##########
@@ -21,58 +21,116 @@
#include "common/status.h"
#include "gutil/macros.h"
-#include "io/fs/file_reader.h"
#include "io/fs/file_reader_options.h"
-#include "io/fs/file_writer.h"
#include "io/fs/path.h"
+#include "io/fs/file_reader_writer_fwd.h"
+#include "io/io_common.h"
namespace doris {
namespace io {
-class FileWriter;
-class FileReader;
-
enum class FileSystemType : uint8_t {
LOCAL,
S3,
HDFS,
BROKER,
};
+struct FileInfo {
+ // only file name, no path
+ std::string file_name;
+ size_t file_size;
+ bool is_file;
+};
+
class FileSystem : public std::enable_shared_from_this<FileSystem> {
public:
+ // The following are public interface.
+ // And derived classes should implement all xxx_impl methods.
+ Status create_file(const Path& file, FileWriterPtr* writer);
+ Status open_file(const Path& file, FileReaderSPtr* reader, IOContext*
io_ctx) {
Review Comment:
warning: all paths through this function will call itself
[clang-diagnostic-infinite-recursion]
```cpp
Status open_file(const Path& file, FileReaderSPtr* reader, IOContext*
io_ctx) {
^
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]