On Thu, Mar 4, 2021 at 5:00 PM Mike Bayer <[email protected]> wrote:
> hey there-
>
> engine.begin() does not do anything to the SQLite connection at all as
> there is no begin() method in the Python DBAPI. this is why in your
> logging you will see a line that says "BEGIN (implicit)". nothing
> happened. the pysqlite driver controls the scope of the actual BEGIN on
> the sqlite3 library implcitly and there should be no difference at all in
> 2.0 vs. 1.x in how this occurs. There is also no difference between
> calling engine.begin() or connection.begin(), assuming "connection" here is
> the SQLAlchemy connection- engine.begin() simply calls connection.begin()
> after procuring the connection.
>
I agree that the `echo`ed trace is identical in both cases. But the
throughput and filesystem effects certainly look as if something is
triggering a checkpoint every time the connection is returned to the pool.
>
> note that pysqlite's implicit control of transactions is often
> insufficient for some scenarios where fine-grained control of transactions
> scope is desired, which is where the recipe at
> https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl
> comes from in order to circumvent pysqlite's assumptions.
>
Thanks for the heads-up.
>
> if you can please provide specifics that illustrate the difference in
> behavior as well as how you detect the WAL condition (as I am unfamiliar
> with this aspect of sqlite) that would be appreciated.
>
Here is a demo that illustrates what I'm seeing with the WAL. See the
commentary inline.
Thanks,
--
Jonathan Brandmeyer
--
SQLAlchemy -
The Python SQL Toolkit and Object Relational Mapper
http://www.sqlalchemy.org/
To post example code, please provide an MCVE: Minimal, Complete, and Verifiable
Example. See http://stackoverflow.com/help/mcve for a full description.
---
You received this message because you are subscribed to the Google Groups
"sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/sqlalchemy/CA%2BXzfkqB_qr3Lgthda3ySkog9RgdbE8eB5z2Lhx3BcLxmLjiXw%40mail.gmail.com.
#!/usr/bin/env python3
# Copyright 2021 Space Sciences and Engineering, LLC.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
import os
import sqlalchemy as sa
from sqlalchemy import create_engine
from sqlalchemy.event import listen
def sqlite_standard_options(dbapi_connection, connection_record):
cursor = dbapi_connection.cursor()
# Enable the WAL. When the database is open read-write, then sometime after
# the first transaction opens up a pair of additional files will be created
# in the filesystem named after the main database file, with suffies -wal
# and -shm.
# https://sqlite.org/wal.html
# https://sqlite.org/pragma.html#pragma_journal_mode
cursor.execute("PRAGMA journal_mode=WAL")
# The default (for backwards-compat reasons) is FULL. When using the WAL,
# sync much less often. The cost is a little more lost work in the face of
# a crash, but the database retains consistency. Its also substantially
# faster.
# https://sqlite.org/pragma.html#pragma_synchronous
cursor.execute("PRAGMA synchronous=NORMAL")
# Enforce the typical default for demonstration purposes. Your sqlite may
# have been compiled with different options.
# https://sqlite.org/pragma.html#pragma_wal_autocheckpoint
cursor.execute("PRAGMA wal_autocheckpoint=1000")
cursor.close()
metadata = sa.MetaData()
filler = sa.Table('filler', metadata,
sa.Column('filler_id', sa.Integer,
nullable=False, primary_key=True),
sa.Column('filler', sa.LargeBinary, nullable=False))
def create_all(engine):
metadata.create_all(engine)
listen(engine, 'connect', sqlite_standard_options)
_ROW_SIZE = 512
def generate_rows(n: int, start: int):
"""
Construct a batch for insertion into the database. The size of the blob
column is chosen to be small enough to fit in typical pages without
triggering overflow pages
"""
with open('/dev/urandom', 'rb') as entropy:
for pk in range(start, start+n):
yield {
"filler_id": pk,
"filler": entropy.read(_ROW_SIZE)
}
def main(args):
if os.path.isfile(args.output_db):
raise ValueError("Test case does not support growing an existing db")
engine = create_engine("sqlite:///" + args.output_db)
create_all(engine)
n_rows_per_txn = args.transaction_size // _ROW_SIZE
n_transactions = int(args.total_size_mb * 1024*1024 / args.transaction_size)
if args.engine_context:
# Runtime: 15.7 seconds on my workstation.
for txn_id in range(n_transactions):
starting_id = n_rows_per_txn * txn_id
work = list(generate_rows(n_rows_per_txn, starting_id))
with engine.begin() as transaction:
transaction.execute(filler.insert(), work)
# Each transaction appears to be completely checkpointing the
# -wal and -shm files. -wal file size is not observed to
# be greater than the size of a single transaction. It is
# frequently observed to be zero during the course of the benchmark.
else:
# Runtime 3.1 seconds on my workstation.
with engine.connect() as connection:
for txn_id in range(n_transactions):
starting_id = n_rows_per_txn * txn_id
work = list(generate_rows(n_rows_per_txn, starting_id))
with connection.begin() as transaction:
connection.execute(filler.insert(), work)
# Once each transaction is complete, the WAL and SHM files
# persist on disk. The files grow to about 1k pages (4 MB).
# Modest checkpoints keep the file about that size.
# Once the connection is fully terminated, sqlite performs a final
# complete checkpoint and deletes the WAL file.
# Final condition: The -wal and -shm files are both deleted.
# If the program crashes partway through, and the -wal and -shm files are
# either both intact, or both deleted, then the database is still
# non-corrupted.
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Benchmark transaction methods in sqlalchemy with the SQLite WAL")
parser.add_argument('--output_db', '-o', help="Archive file name")
parser.add_argument('--engine_context',
action="store_true",
help="Use the engine for transaction context")
# Default unit of total ingestion: 100 MB. Large enough to peek at the
# filesystem state while the test is running, but not so large that its a
# bomb. You may need to make this somewhat larger to clearly observe the
# filesystem state when not in --engine_context mode. Index pages make this
# only an approximation.
parser.add_argument('--total_size_mb',
default=100,
type=int,
help="Approximate total db size to create, in megabytes")
# The default WAL auto-checkpoint size is 1000 pages and the default page
# size is 4kb. Choose a default transaction size that is small enough to
# clearly distinguish good WAL behavior from poor WAL behavior.
parser.add_argument('--transaction_size',
default=100000,
type=int,
help="Unit of work per transaction, in bytes")
main(parser.parse_args())