From caa7c365e7ff91d787aea0bb697cf3dd6647fceb Mon Sep 17 00:00:00 2001 From: Shane Harvey Date: Wed, 26 Oct 2022 13:57:41 -0700 Subject: [PATCH] Improve save() performance by skipping index creation Indexes are now only created when a Model is first used, eg the first call to save() or on the first call to _get_collection(), or when the new meta["auto_create_index_on_save"] option is set to True. This is a minor breaking change for some applications. As a workaround apps can explicitly call ensure_indexes() or set meta["auto_create_index_on_save"] to True. --- benchmarks/test_basic_doc_ops.py | 2 +- benchmarks/test_inserts.py | 15 ++--- benchmarks/test_save_with_indexes.py | 87 ++++++++++++++++++++++++++++ docs/changelog.rst | 5 ++ docs/guide/defining-documents.rst | 8 ++- mongoengine/document.py | 10 +++- tests/document/test_indexes.py | 66 +++++++++++---------- 7 files changed, 151 insertions(+), 42 deletions(-) create mode 100644 benchmarks/test_save_with_indexes.py diff --git a/benchmarks/test_basic_doc_ops.py b/benchmarks/test_basic_doc_ops.py index 66b6a17f9..8b8bf4aaf 100644 --- a/benchmarks/test_basic_doc_ops.py +++ b/benchmarks/test_basic_doc_ops.py @@ -12,7 +12,7 @@ StringField, ) -mongoengine.connect(db="mongoengine_benchmark_test") +mongoengine.connect(db="mongoengine_benchmark_test", w=1) def timeit(f, n=10000): diff --git a/benchmarks/test_inserts.py b/benchmarks/test_inserts.py index dcd18ff88..8e8419933 100644 --- a/benchmarks/test_inserts.py +++ b/benchmarks/test_inserts.py @@ -5,15 +5,11 @@ def main(): setup = """ from pymongo import MongoClient -connection = MongoClient() +connection = MongoClient(w=1) connection.drop_database('mongoengine_benchmark_test') """ stmt = """ -from pymongo import MongoClient - -connection = MongoClient() - db = connection.mongoengine_benchmark_test noddy = db.noddy @@ -29,13 +25,12 @@ def main(): """ print("-" * 100) - print("PyMongo: Creating 10000 dictionaries.") + print('PyMongo: Creating 10000 dictionaries (write_concern={"w": 1}).') t = timeit.Timer(stmt=stmt, setup=setup) print(f"{t.timeit(1)}s") stmt = """ -from pymongo import MongoClient, WriteConcern -connection = MongoClient() +from pymongo import WriteConcern db = connection.mongoengine_benchmark_test noddy = db.noddy.with_options(write_concern=WriteConcern(w=0)) @@ -64,7 +59,7 @@ def main(): connection.close() from mongoengine import Document, DictField, connect -connect("mongoengine_benchmark_test") +connect("mongoengine_benchmark_test", w=1) class Noddy(Document): fields = DictField() @@ -82,7 +77,7 @@ class Noddy(Document): """ print("-" * 100) - print("MongoEngine: Creating 10000 dictionaries.") + print('MongoEngine: Creating 10000 dictionaries (write_concern={"w": 1}).') t = timeit.Timer(stmt=stmt, setup=setup) print(f"{t.timeit(1)}s") diff --git a/benchmarks/test_save_with_indexes.py b/benchmarks/test_save_with_indexes.py new file mode 100644 index 000000000..86e281cb3 --- /dev/null +++ b/benchmarks/test_save_with_indexes.py @@ -0,0 +1,87 @@ +import timeit + + +def main(): + setup = """ +from pymongo import MongoClient + +connection = MongoClient() +connection.drop_database("mongoengine_benchmark_test") +connection.close() + +from mongoengine import connect, Document, IntField, StringField +connect("mongoengine_benchmark_test", w=1) + +class User0(Document): + name = StringField() + age = IntField() + +class User1(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name"]]} + +class User2(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name", "age"]]} + +class User3(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name"]], "auto_create_index_on_save": True} + +class User4(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name", "age"]], "auto_create_index_on_save": True} +""" + + stmt = """ +for i in range(10000): + User0(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 0 indexes.") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User1(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 1 index.") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User2(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 2 indexes.") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User3(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 1 index (auto_create_index_on_save=True).") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User4(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 2 indexes (auto_create_index_on_save=True).") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + +if __name__ == "__main__": + main() diff --git a/docs/changelog.rst b/docs/changelog.rst index 282934168..82772d9e3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,11 @@ Development - Support MONGODB-AWS authentication mechanism (with `authmechanismproperties`) #2507 - Turning off dereferencing for the results of distinct query. #2663 - Add tests against Mongo 5.0 in pipeline +- BREAKING CHANGE: Improved the performance of :meth:`~mongoengine.Document.save()` + by removing the call to :meth:`~mongoengine.Document.ensure_indexes` unless + ``meta['auto_create_index_on_save']`` is set to True. +- Added meta ``auto_create_index_on_save`` so you can enable index creation + on :meth:`~mongoengine.Document.save()`. Changes in 0.24.2 ================= diff --git a/docs/guide/defining-documents.rst b/docs/guide/defining-documents.rst index 53758672a..df749ee1e 100644 --- a/docs/guide/defining-documents.rst +++ b/docs/guide/defining-documents.rst @@ -574,6 +574,7 @@ There are a few top level defaults for all indexes that can be set:: 'index_background': True, 'index_cls': False, 'auto_create_index': True, + 'auto_create_index_on_save': False, } @@ -588,10 +589,15 @@ There are a few top level defaults for all indexes that can be set:: :attr:`auto_create_index` (Optional) When this is True (default), MongoEngine will ensure that the correct - indexes exist in MongoDB each time a command is run. This can be disabled + indexes exist in MongoDB when the Document is first used. This can be disabled in systems where indexes are managed separately. Disabling this will improve performance. +:attr:`auto_create_index_on_save` (Optional) + When this is True, MongoEngine will ensure that the correct + indexes exist in MongoDB each time :meth:`~mongoengine.document.Document.save` + is run. Enabling this will degrade performance. The default is False. This + option was added in version 0.25. Compound Indexes and Indexing sub documents ------------------------------------------- diff --git a/mongoengine/document.py b/mongoengine/document.py index e7a1938f2..b8b05402f 100644 --- a/mongoengine/document.py +++ b/mongoengine/document.py @@ -384,6 +384,10 @@ def save( meta['cascade'] = True. Also you can pass different kwargs to the cascade save using cascade_kwargs which overwrites the existing kwargs with custom values. + .. versionchanged:: 0.25 + save() no longer calls :meth:`~mongoengine.Document.ensure_indexes` + unless ``meta['auto_create_index_on_save']`` is set to True. + """ signal_kwargs = signal_kwargs or {} @@ -407,8 +411,12 @@ def save( # it might be refreshed by the pre_save_post_validation hook, e.g., for etag generation doc = self.to_mongo() - if self._meta.get("auto_create_index", True): + if self._meta.get("auto_create_index_on_save", False): self.ensure_indexes() + else: + # Call _get_collection so that errors from ensure_indexes are not + # wrapped in OperationError, see test_primary_key_unique_not_working. + self._get_collection() try: # Save a new document or update an existing one diff --git a/tests/document/test_indexes.py b/tests/document/test_indexes.py index 4d56f8553..f367cd0f9 100644 --- a/tests/document/test_indexes.py +++ b/tests/document/test_indexes.py @@ -983,44 +983,52 @@ class Book(Document): def test_indexes_after_database_drop(self): """ - Test to ensure that indexes are re-created on a collection even - after the database has been dropped. + Test to ensure that indexes are not re-created on a collection + after the database has been dropped unless auto_create_index_on_save + is enabled. - Issue #812 + Issue #812 and #1446. """ # Use a new connection and database since dropping the database could # cause concurrent tests to fail. - connection = connect( - db="tempdatabase", alias="test_indexes_after_database_drop" - ) + tmp_alias = "test_indexes_after_database_drop" + connection = connect(db="tempdatabase", alias=tmp_alias) + self.addCleanup(connection.drop_database, "tempdatabase") class BlogPost(Document): - title = StringField() slug = StringField(unique=True) + meta = {"db_alias": tmp_alias} - meta = {"db_alias": "test_indexes_after_database_drop"} + BlogPost.drop_collection() + BlogPost(slug="test").save() + with pytest.raises(NotUniqueError): + BlogPost(slug="test").save() - try: - BlogPost.drop_collection() - - # Create Post #1 - post1 = BlogPost(title="test1", slug="test") - post1.save() - - # Drop the Database - connection.drop_database("tempdatabase") - - # Re-create Post #1 - post1 = BlogPost(title="test1", slug="test") - post1.save() - - # Create Post #2 - post2 = BlogPost(title="test2", slug="test") - with pytest.raises(NotUniqueError): - post2.save() - finally: - # Drop the temporary database at the end - connection.drop_database("tempdatabase") + # Drop the Database + connection.drop_database("tempdatabase") + BlogPost(slug="test").save() + # No error because the index was not recreated after dropping the database. + BlogPost(slug="test").save() + + # Repeat with auto_create_index_on_save: True. + class BlogPost2(Document): + slug = StringField(unique=True) + meta = { + "db_alias": tmp_alias, + "auto_create_index_on_save": True, + } + + BlogPost2.drop_collection() + BlogPost2(slug="test").save() + with pytest.raises(NotUniqueError): + BlogPost2(slug="test").save() + + # Drop the Database + connection.drop_database("tempdatabase") + BlogPost2(slug="test").save() + # Error because ensure_indexes is run on every save(). + with pytest.raises(NotUniqueError): + BlogPost2(slug="test").save() def test_index_dont_send_cls_option(self): """