[ https://issues.apache.org/jira/browse/ACCUMULO-4195?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15252369#comment-15252369 ]
ASF GitHub Bot commented on ACCUMULO-4195: ------------------------------------------ Github user ShawnWalker commented on a diff in the pull request: https://github.com/apache/accumulo/pull/95#discussion_r60627887 --- Diff: core/src/main/java/org/apache/accumulo/core/file/FileOperations.java --- @@ -48,38 +48,295 @@ public static FileOperations getInstance() { return new DispatchingFileFactory(); } + // + // Abstract methods (to be implemented by subclasses) + // + + protected abstract long getFileSize(GetFileSizeOperation options) throws IOException; + + protected abstract FileSKVWriter openWriter(OpenWriterOperation options) throws IOException; + + protected abstract FileSKVIterator openIndex(OpenIndexOperation options) throws IOException; + + protected abstract FileSKVIterator openScanReader(OpenScanReaderOperation options) throws IOException; + + protected abstract FileSKVIterator openReader(OpenReaderOperation options) throws IOException; + + // + // File operations + // + /** - * Open a reader that will not be seeked giving an initial seek location. This is useful for file operations that only need to scan data within a range and do - * not need to seek. Therefore file metadata such as indexes does not need to be kept in memory while the file is scanned. Also seek optimizations like bloom - * filters do not need to be loaded. + * Construct an operation object allowing one to query the size of a file. <br> + * Syntax: * + * <pre> + * long size = fileOperations.getFileSize().ofFile(filename, fileSystem, fsConfiguration).withTableConfiguration(tableConf).execute(); + * </pre> */ + public GetFileSizeOperation getFileSize() { + return new GetFileSizeOperation(); + } - public abstract FileSKVIterator openReader(String file, Range range, Set<ByteSequence> columnFamilies, boolean inclusive, FileSystem fs, Configuration conf, - RateLimiter readLimiter, AccumuloConfiguration tableConf) throws IOException; + /** + * Construct an operation object allowing one to create a writer for a file. <br> + * Syntax: + * + * <pre> + * FileSKVWriter writer = fileOperations.openWriter() + * .ofFile(...) + * .withTableConfiguration(...) + * .withRateLimiter(...) // optional + * .withCompression(...) // optional + * .execute(); + * </pre> + */ + public OpenWriterOperation openWriter() { + return new OpenWriterOperation(); + } + + /** + * Construct an operation object allowing one to create an index iterator for a file. <br> + * Syntax: + * + * <pre> + * FileSKVIterator iterator = fileOperations.openIndex() + * .ofFile(...) + * .withTableConfiguration(...) + * .withRateLimiter(...) // optional + * .withBlockCache(...) // optional + * .execute(); + * </pre> + */ + public OpenIndexOperation openIndex() { + return new OpenIndexOperation(); + } - public abstract FileSKVIterator openReader(String file, Range range, Set<ByteSequence> columnFamilies, boolean inclusive, FileSystem fs, Configuration conf, - RateLimiter readLimiter, AccumuloConfiguration tableConf, BlockCache dataCache, BlockCache indexCache) throws IOException; + /** + * Construct an operation object allowing one to create a "scan" reader for a file. Scan readers do not have any optimizations for seeking beyond their + * initial position. This is useful for file operations that only need to scan data within a range and do not need to seek. Therefore file metadata such as + * indexes does not need to be kept in memory while the file is scanned. Also seek optimizations like bloom filters do not need to be loaded. <br> + * Syntax: + * + * <pre> + * FileSKVIterator scanner = fileOperations.openScanReader() + * .ofFile(...) + * .overRange(...) + * .withTableConfiguration(...) + * .withRateLimiter(...) // optional + * .withBlockCache(...) // optional + * .execute(); + * </pre> + */ + public OpenScanReaderOperation openScanReader() { + return new OpenScanReaderOperation(); + } /** - * Open a reader that fully support seeking and also enable any optimizations related to seeking, like bloom filters. + * Construct an operation object allowing one to create a reader for a file. A reader constructed in this manner fully supports seeking, and also enables any + * optimizations related to seeking (e.g. Bloom filters). <br> + * Syntax: * + * <pre> + * FileSKVIterator scanner = fileOperations.openReader() + * .ofFile(...) + * .withTableConfiguration(...) + * .withRateLimiter(...) // optional + * .withBlockCache(...) // optional + * .seekToBeginning(...) // optional + * .execute(); + * </pre> + */ + public OpenReaderOperation openReader() { + return new OpenReaderOperation(); + } + + // + // Operation objects. + // + + /** + * Options common to all FileOperations. + */ + protected static class FileAccessOperation<SubclassType extends FileAccessOperation<SubclassType>> { + private AccumuloConfiguration tableConfiguration; + + private String filename; + private FileSystem fs; + private Configuration fsConf; + + /** Specify the table configuration defining access to this file. */ + @SuppressWarnings("unchecked") + public SubclassType withTableConfiguration(AccumuloConfiguration tableConfiguration) { + this.tableConfiguration = tableConfiguration; + return (SubclassType) this; + } + + /** Specify the file this operation should apply to. */ + @SuppressWarnings("unchecked") + public SubclassType ofFile(String filename, FileSystem fs, Configuration fsConf) { + this.filename = filename; + this.fs = fs; + this.fsConf = fsConf; + return (SubclassType) this; + } + + public String getFilename() { + return filename; + } + + public FileSystem getFileSystem() { + return fs; + } + + public Configuration getConfiguration() { + return fsConf; + } + + public AccumuloConfiguration getTableConfiguration() { --- End diff -- I didn't, but I'll consider it. There's a balance here. Keeping to a purely 'fluent' style, I should have a function call for each separate parameter, e.g. ```java openWriter() .ofFile(...) .onFileSystem(...) .withFileSystemConfiguration(conf) .withTableConfiguration(...) ... ``` I decided that 'file', and 'filesystem' were reasonable to lump together to cut down on verbosity. I'm not entirely sure where the `Configuration` should go, particularly as something like 50% of callers just use the equivalent of `fs.getConf()` for this, but I lumped it together with 'file' too. On the other hand, table configuration seems to me to really be a very different concept. Perhaps (?): ```java openWriter() .ofFile(filename, fs) .withConfiguration(conf, tableConf) ... ``` > Generalized configuration object for Accumulo rfile interaction > --------------------------------------------------------------- > > Key: ACCUMULO-4195 > URL: https://issues.apache.org/jira/browse/ACCUMULO-4195 > Project: Accumulo > Issue Type: Improvement > Reporter: Josh Elser > Assignee: Shawn Walker > Fix For: 1.8.0 > > > Taken from https://github.com/apache/accumulo/pull/90/files#r59489073 > On [~ShawnWalker]'s PR for ACCUMULO-4187 which adds rate-limiting on major > compactions, we noted that many of the changes were related to passing an > extra argument (RateLimiter) around through all of the code which is related > to file interaction. > It would be nice to move to a centralized configuration object instead of > having to add a new argument every time some new feature is added to the > file-path. -- This message was sent by Atlassian JIRA (v6.3.4#6332)