nickva commented on a change in pull request #496: Couchdb 3287 pluggable 
storage engines
URL: https://github.com/apache/couchdb/pull/496#discussion_r118568765
 
 

 ##########
 File path: src/couch/src/couch_db_engine.erl
 ##########
 @@ -0,0 +1,877 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_db_engine).
+
+
+-include("couch_db.hrl").
+-include("couch_db_int.hrl").
+
+
+-type filepath() :: iolist().
+-type docid() :: binary().
+-type rev() :: {non_neg_integer(), binary()}.
+-type revs() :: [rev()].
+-type json() :: any().
+
+-type doc_pair() :: {
+        #full_doc_info{} | not_found,
+        #full_doc_info{} | not_found
+    }.
+
+-type doc_pairs() :: [doc_pair()].
+
+-type db_open_options() :: [
+        create
+    ].
+
+-type delete_options() :: [
+        {context, delete | compaction} |
+        sync
+    ].
+
+-type purge_info() :: [{docid(), revs()}].
+-type epochs() :: [{Node::atom(), UpdateSeq::non_neg_integer()}].
+-type size_info() :: [{Name::atom(), Size::non_neg_integer()}].
+
+-type write_stream_options() :: [
+        {buffer_size, Size::pos_integer()} |
+        {encoding, atom()} |
+        {compression_level, pos_integer()}
+    ].
+
+-type doc_fold_options() :: [
+        {start_key, Key::any()} |
+        {end_key, Key::any()} |
+        {end_key_gt, Key::any()} |
+        {dir, fwd | rev} |
+        include_reductions |
+        include_deleted
+    ].
+
+-type changes_fold_options() :: [
+        {dir, fwd | rev}
+    ].
+
+-type db_handle() :: any().
+
+-type doc_fold_fun() :: fun((#full_doc_info{}, UserAcc::any()) ->
+        {ok, NewUserAcc::any()} |
+        {stop, NewUserAcc::any()}).
+
+-type local_doc_fold_fun() :: fun((#doc{}, UserAcc::any()) ->
+        {ok, NewUserAcc::any()} |
+        {stop, NewUserAcc::any()}).
+
+-type changes_fold_fun() :: fun((#doc_info{}, UserAcc::any()) ->
+        {ok, NewUserAcc::any()} |
+        {stop, NewUserAcc::any()}).
+
+
+% This is called by couch_server to determine which
+% engine should be used for the given database. DbPath
+% is calculated based on the DbName and the configured
+% extension for a given engine. The first engine to
+% return true is the engine that will be used for the
+% database.
+-callback exists(DbPath::filepath()) -> boolean().
+
+
+% This is called by couch_server to delete a database. It
+% is called from inside the couch_server process which
+% means that the storage engine does not have to guarantee
+% its own consistency checks when executing in this
+% context. Although since this is executed in the context
+% of couch_server it should return relatively quickly.
+-callback delete(
+            RootDir::filepath(),
+            DbPath::filepath(),
+            DelOpts::delete_options()) ->
+        ok | {error, Reason::atom()} | any().
+
+
+% This function can be called from multiple contexts. It
+% will either be called just before a call to delete/3 above
+% or when a compaction is cancelled which executes in the
+% context of a couch_db_updater process. It is intended to
+% remove any temporary files used during compaction that
+% may be used to recover from a failed compaction swap.
+-callback delete_compaction_files(
+            RootDir::filepath(),
+            DbPath::filepath(),
+            DelOpts::delete_options()) ->
+        ok.
+
+
+% This is called from the couch_db_updater:init/1 context. As
+% such this means that it is guaranteed to only have one process
+% executing for a given DbPath argument (ie, opening a given
+% database is guaranteed to only happen in a single process).
+% However, multiple process may be trying to open different
+% databases concurrently so if a database requires a shared
+% resource that will require concurrency control at the storage
+% engine layer.
+%
+% The returned DbHandle should be a term that can be freely
+% copied between processes and accessed concurrently. However
+% its guaranteed that the handle will only ever be mutated
+% in a single threaded context (ie, within the couch_db_updater
+% process).
+-callback init(DbPath::filepath(), db_open_options()) ->
+    {ok, DbHandle::db_handle()}.
+
+
+% This is called in the context of couch_db_updater:terminate/2
+% and as such has the same properties for init/2. It's guaranteed
+% to be consistent for a given database but may be called by many
+% databases concurrently.
+-callback terminate(Reason::any(), DbHandle::db_handle()) -> Ignored::any().
+
+
+% This is called in the context of couch_db_updater:handle_call/3
+% for any message that is unknown. It can be used to handle messages
+% from asynchronous processes like the engine's compactor if it has one.
+-callback handle_call(Msg::any(), DbHandle::db_handle()) ->
+        {reply, Resp::any(), NewDbHandle::db_handle()} |
+        {stop, Reason::any(), Resp::any(), NewDbHandle::db_handle()}.
+
+
+% This is called in the context of couch_db_updater:handle_info/2
+% and has the same properties as handle_call/3.
+-callback handle_info(Msg::any(), DbHandle::db_handle()) ->
+    {noreply, NewDbHandle::db_handle()} |
+    {noreply, NewDbHandle::db_handle(), Timeout::timeout()} |
+    {stop, Reason::any(), NewDbHandle::db_handle()}.
+
+
+% These functions are called by any process opening or closing
+% a database. As such they need to be able to handle being
+% called concurrently. For example, the legacy engine uses these
+% to add monitors to the main engine process.
+-callback incref(DbHandle::db_handle()) -> {ok, NewDbHandle::db_handle()}.
+-callback decref(DbHandle::db_handle()) -> ok.
+-callback monitored_by(DbHande::db_handle()) -> [pid()].
+
+
+% All of the get_* functions may be called from many
+% processes concurrently.
+
+% The database should make a note of the update sequence when it
+% was last compacted. If the database doesn't need compacting it
+% can just hard code a return value of 0.
+-callback get_compacted_seq(DbHandle::db_handle()) ->
+            CompactedSeq::non_neg_integer().
+
+
+% The number of documents in the database which have all leaf
+% revisions marked as deleted.
+-callback get_del_doc_count(DbHandle::db_handle()) ->
+            DelDocCount::non_neg_integer().
+
+
+% This number is reported in the database info properties and
+% as such can be any JSON value.
+-callback get_disk_version(DbHandle::db_handle()) -> Version::json().
+
+
+% The number of documents in the database that have one or more
+% leaf revisions not marked as deleted.
+-callback get_doc_count(DbHandle::db_handle()) -> DocCount::non_neg_integer().
+
+
+% The epochs track which node owned the database starting at
+% a given update sequence. Each time a database is opened it
+% should look at the epochs. If the most recent entry is not
+% for the current node it should add an entry that will be
+% written the next time a write is performed. An entry is
+% simply a {node(), CurrentUpdateSeq} tuple.
+-callback get_epochs(DbHandle::db_handle()) -> Epochs::epochs().
+
+
+% Get the last purge request performed.
+-callback get_last_purged(DbHandle::db_handle()) -> LastPurged::purge_info().
+
+
+% Get the current purge sequence. This should be incremented
+% for every purge operation.
+-callback get_purge_seq(DbHandle::db_handle()) -> PurgeSeq::non_neg_integer().
+
+
+% Get the revision limit. This should just return the last
+% value that was passed to set_revs_limit/2.
+-callback get_revs_limit(DbHandle::db_handle()) -> RevsLimit::pos_integer().
+
+
+% Get the current security properties. This should just return
+% the last value that was passed to set_security/2.
+-callback get_security(DbHandle::db_handle()) -> SecProps::any().
+
+
+% This information is displayed in the database info poperties. It
+% should just be a list of {Name::atom(), Size::non_neg_integer()}
+% tuples that will then be combined across shards. Currently,
+% various modules expect there to at least be values for:
+%
+%   file     - Number of bytes on disk
+%
+%   active   - Theoretical minimum number of bytes to store this db on disk
+%              which is used to guide decisions on compaction
+%
+%   external - Number of bytes that would be required to represent the
+%              contents outside of the database (for capacity and backup
+%              planning)
+-callback get_size_info(DbHandle::db_handle()) -> SizeInfo::size_info().
+
+
+% The current update sequence of the database. The update
+% sequence should be incrememnted for every revision added to
+% the database.
+-callback get_update_seq(DbHandle::db_handle()) -> 
UpdateSeq::non_neg_integer().
+
+
+% Whenever a database is created it should generate a
+% persistent UUID for identification in case the shard should
+% ever need to be moved between nodes in a cluster.
+-callback get_uuid(DbHandle::db_handle()) -> UUID::binary().
+
+
+% These functions are only called by couch_db_updater and
+% as such are guaranteed to be single threaded calls. The
+% database should simply store these values somewhere so
+% they can be returned by the corresponding get_* calls.
+
 
 Review comment:
   Remove space between comment and function (for consistency with the rest of 
the module).
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to