From 598d233024d0f54791daaee03e2da7dbe7f3898e Mon Sep 17 00:00:00 2001 From: "Carsten Wolff (cawo)" Date: Mon, 24 Nov 2025 12:30:06 +0000 Subject: [PATCH 1/7] [ADD] pg.query_ids: query large numbers of ids memory-safely This is mainly the code that has been recently added to `orm.recompute_fields`, here we're making it re-usasble. --- src/base/tests/test_util.py | 26 ++++++++++++ src/util/pg.py | 80 +++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/src/base/tests/test_util.py b/src/base/tests/test_util.py index 325368fe1..fa9f924d8 100644 --- a/src/base/tests/test_util.py +++ b/src/base/tests/test_util.py @@ -18,6 +18,11 @@ from odoo import modules from odoo.tools import mute_logger +try: + from odoo.sql_db import db_connect +except ImportError: + from openerp.sql_db import db_connect + from odoo.addons.base.maintenance.migrations import util from odoo.addons.base.maintenance.migrations.testing import UnitTestCase, parametrize from odoo.addons.base.maintenance.migrations.util import snippets @@ -1494,6 +1499,27 @@ def test_iter(self): self.assertEqual(result, expected) +class TestQueryIds(UnitTestCase): + def test_straight(self): + result = list(util.query_ids(self.env.cr, "SELECT * FROM (VALUES (1), (2)) AS x(x)", itersize=2)) + self.assertEqual(result, [1, 2]) + + def test_chunks(self): + with util.query_ids(self.env.cr, "SELECT * FROM (VALUES (1), (2)) AS x(x)") as ids: + result = list(util.chunks(ids, 100, fmt=list)) + self.assertEqual(result, [[1, 2]]) + + def test_destructor(self): + ids = util.query_ids(self.env.cr, "SELECT id from res_users") + del ids + + def test_pk_violation(self): + with db_connect(self.env.cr.dbname).cursor() as cr, mute_logger("odoo.sql_db"), self.assertRaises( + ValueError + ), util.query_ids(cr, "SELECT * FROM (VALUES (1), (1)) AS x(x)") as ids: + list(ids) + + class TestRecords(UnitTestCase): def test_rename_xmlid(self): cr = self.env.cr diff --git a/src/util/pg.py b/src/util/pg.py index 0ab92a9c2..ec2d47e27 100644 --- a/src/util/pg.py +++ b/src/util/pg.py @@ -1932,3 +1932,83 @@ def bulk_update_table(cr, table, columns, mapping, key_col="id"): key_col=key_col, ) cr.execute(query, [Json(mapping)]) + + +class query_ids(object): + """ + Iterator over ids returned by a query. + + This allows iteration over a potentially huge number of ids without exhausting memory. + + :param str query: the query that returns the ids. It can be DML, e.g. `UPDATE table WHERE ... RETURNING id`. + :param int itersize: determines the number of rows fetched from PG at once, see :func:`~odoo.upgrade.util.pg.named_cursor`. + """ + + def __init__(self, cr, query, itersize=None): + self._ncr = None + self._cr = cr + self._tmp_tbl = "_upgrade_query_ids_{}".format(uuid.uuid4().hex) + cr.execute( + format_query( + cr, + "CREATE UNLOGGED TABLE {}(id) AS (WITH query AS ({}) SELECT * FROM query)", + self._tmp_tbl, + SQLStr(query), + ) + ) + self._len = cr.rowcount + try: + cr.execute( + format_query( + cr, + "ALTER TABLE {} ADD CONSTRAINT {} PRIMARY KEY (id)", + self._tmp_tbl, + "pk_{}_id".format(self._tmp_tbl), + ) + ) + except psycopg2.IntegrityError as e: + if e.pgcode == errorcodes.UNIQUE_VIOLATION: + raise ValueError("The query for ids is producing duplicate values:\n{}".format(query)) + raise + self._ncr = named_cursor(cr, itersize) + self._ncr.execute(format_query(cr, "SELECT id FROM {} ORDER BY id", self._tmp_tbl)) + self._it = iter(self._ncr) + + def _close(self): + if self._ncr: + if self._ncr.closed: + return + self._ncr.close() + try: + self._cr.execute(format_query(self._cr, "DROP TABLE IF EXISTS {}", self._tmp_tbl)) + except psycopg2.InternalError as e: + if e.pgcode != errorcodes.IN_FAILED_SQL_TRANSACTION: + raise + + def __len__(self): + return self._len + + def __iter__(self): + return self + + def __next__(self): + if self._ncr.closed: + raise StopIteration + try: + return next(self._it)[0] + except StopIteration: + self._close() + raise + + def next(self): + return self.__next__() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._close() + return False + + def __del__(self): + self._close() From 6caef94ebf21a2bd608527263af4485bc3e64237 Mon Sep 17 00:00:00 2001 From: "Carsten Wolff (cawo)" Date: Mon, 24 Nov 2025 13:34:07 +0000 Subject: [PATCH 2/7] [IMP] orm.recompute_fields: use new pg.query_ids This code in recompute_fields has been made re-usable in a new util pg.query_ids. Use that. --- src/util/orm.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/util/orm.py b/src/util/orm.py index 228c572ca..586911d21 100644 --- a/src/util/orm.py +++ b/src/util/orm.py @@ -42,7 +42,7 @@ from .exceptions import MigrationError from .helpers import table_of_model from .misc import chunks, log_progress, version_between, version_gte -from .pg import SQLStr, column_exists, format_query, get_columns, named_cursor +from .pg import SQLStr, column_exists, format_query, get_columns, query_ids # python3 shims try: @@ -288,27 +288,16 @@ def recompute_fields(cr, model, fields, ids=None, logger=_logger, chunk_size=256 Model = env(cr)[model] if isinstance(model, basestring) else model model = Model._name - if ids is None: - query = format_query(cr, "SELECT id FROM {}", table_of_model(cr, model)) if query is None else SQLStr(query) - cr.execute( - format_query(cr, "CREATE UNLOGGED TABLE _upgrade_rf(id) AS (WITH query AS ({}) SELECT * FROM query)", query) + ids_ = ids + if ids_ is None: + ids_ = query_ids( + cr, + format_query(cr, "SELECT id FROM {}", table_of_model(cr, model)) if query is None else SQLStr(query), + itersize=2**20, ) - count = cr.rowcount - cr.execute("ALTER TABLE _upgrade_rf ADD CONSTRAINT pk_upgrade_rf_id PRIMARY KEY (id)") - - def get_ids(): - with named_cursor(cr, itersize=2**20) as ncr: - ncr.execute("SELECT id FROM _upgrade_rf ORDER BY id") - for (id_,) in ncr: - yield id_ - - ids_ = get_ids() - else: - count = len(ids) - ids_ = ids + count = len(ids_) if not count: - cr.execute("DROP TABLE IF EXISTS _upgrade_rf") return _logger.info("Computing fields %s of %r on %d records", fields, model, count) @@ -338,7 +327,6 @@ def get_ids(): else: flush(records) invalidate(records) - cr.execute("DROP TABLE IF EXISTS _upgrade_rf") class iter_browse(object): From 30020ce44997ca85fe36078b2047e2f1cf42d421 Mon Sep 17 00:00:00 2001 From: "Carsten Wolff (cawo)" Date: Mon, 24 Nov 2025 14:26:03 +0000 Subject: [PATCH 3/7] [FIX] models.remove_model: MemoryError ``` Traceback (most recent call last): [...] File "/tmp/tmpipxrg2eq/migrations/util/models.py", line 563, in merge_model remove_model(cr, source, drop_table=drop_table, ignore_m2m=ignore_m2m) File "/tmp/tmpipxrg2eq/migrations/util/models.py", line 138, in remove_model it = chunks([id for (id,) in cr.fetchall()], chunk_size, fmt=tuple) MemoryError ``` Some IR tables can be large. Avoid `cr.fetchall()` when getting ids by use of pg.query_ids() --- src/util/models.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/util/models.py b/src/util/models.py index c92d42a0b..d3c769b56 100644 --- a/src/util/models.py +++ b/src/util/models.py @@ -27,6 +27,7 @@ get_m2m_tables, get_value_or_en_translation, parallel_execute, + query_ids, table_exists, update_m2m_tables, view_exists, @@ -128,17 +129,17 @@ def remove_model(cr, model, drop_table=True, ignore_m2m=()): 'SELECT id FROM "{}" r WHERE {}'.format(ir.table, ir.model_filter(prefix="r.")), [model] ).decode() - cr.execute(query) - if ir.table == "ir_ui_view": - for (view_id,) in cr.fetchall(): - remove_view(cr, view_id=view_id, silent=True) - else: - # remove in batch - size = (cr.rowcount + chunk_size - 1) / chunk_size - it = chunks([id for (id,) in cr.fetchall()], chunk_size, fmt=tuple) - for sub_ids in log_progress(it, _logger, qualifier=ir.table, size=size): - remove_records(cr, ref_model, sub_ids) - _rm_refs(cr, ref_model, sub_ids) + with query_ids(cr, query, itersize=chunk_size) as ids_: + if ir.table == "ir_ui_view": + for view_id in ids_: + remove_view(cr, view_id=view_id, silent=True) + else: + # remove in batch + size = (len(ids_) + chunk_size - 1) / chunk_size + it = chunks(ids_, chunk_size, fmt=tuple) + for sub_ids in log_progress(it, _logger, qualifier=ir.table, size=size): + remove_records(cr, ref_model, sub_ids) + _rm_refs(cr, ref_model, sub_ids) if ir.set_unknown: # Link remaining records not linked to a XMLID From 2162f8e3ac0c3638d6e6acc393feca62dfce624b Mon Sep 17 00:00:00 2001 From: "Carsten Wolff (cawo)" Date: Fri, 12 Sep 2025 07:23:19 +0000 Subject: [PATCH 4/7] [IMP] orm: iter_browse accept generator or query as ids This allows the caller to be memory efficient on huge numbers of ids, allowing for even more millions of records to be browsed. --- src/util/orm.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/util/orm.py b/src/util/orm.py index 586911d21..6f9ede06b 100644 --- a/src/util/orm.py +++ b/src/util/orm.py @@ -362,7 +362,9 @@ class iter_browse(object): :param model: the model to iterate :type model: :class:`odoo.model.Model` - :param list(int) ids: list of IDs of the records to iterate + :param iterable(int) ids: iterable of IDs of the records to iterate + :param str query: alternative to ids, SQL query that can produce them. + Can also be a DML statement with a RETURNING clause. :param int chunk_size: number of records to load in each iteration chunk, `200` by default :param bool yield_chunks: when iterating, yield records in chunks of `chunk_size` instead of one by one. @@ -377,14 +379,27 @@ class iter_browse(object): See also :func:`~odoo.upgrade.util.orm.env` """ - __slots__ = ("_chunk_size", "_cr_uid", "_it", "_logger", "_model", "_patch", "_size", "_strategy", "_yield_chunks") + __slots__ = ( + "_chunk_size", + "_cr_uid", + "_ids", + "_it", + "_logger", + "_model", + "_patch", + "_query", + "_size", + "_strategy", + "_yield_chunks", + ) def __init__(self, model, *args, **kw): assert len(args) in [1, 3] # either (cr, uid, ids) or (ids,) self._model = model self._cr_uid = args[:-1] - ids = args[-1] - self._size = len(ids) + self._ids = args[-1] + self._size = kw.pop("size", None) + self._query = kw.pop("query", None) self._chunk_size = kw.pop("chunk_size", 200) # keyword-only argument self._yield_chunks = kw.pop("yield_chunks", False) self._logger = kw.pop("logger", _logger) @@ -393,8 +408,19 @@ def __init__(self, model, *args, **kw): if kw: raise TypeError("Unknown arguments: %s" % ", ".join(kw)) + if not (self._ids is None) ^ (self._query is None): + raise TypeError("Must be initialized using exactly one of `ids` or `query`") + + if self._query: + self._ids = query_ids(self._model.env.cr, self._query, itersize=self._chunk_size) + + if not self._size: + try: + self._size = len(self._ids) + except TypeError: + raise ValueError("When passing ids as a generator, the size kwarg is mandatory") self._patch = None - self._it = chunks(ids, self._chunk_size, fmt=self._browse) + self._it = chunks(self._ids, self._chunk_size, fmt=self._browse) def _browse(self, ids): next(self._end(), None) From ad0d9e5b028037aa16b6a03a86507fb84c41c098 Mon Sep 17 00:00:00 2001 From: "Carsten Wolff (cawo)" Date: Tue, 30 Sep 2025 10:14:28 +0000 Subject: [PATCH 5/7] [IMP] orm: iter_browse.create() accept generator or query as values Done to be able to create millions of records memory-efficiently. --- src/util/orm.py | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/util/orm.py b/src/util/orm.py index 6f9ede06b..8638f9a85 100644 --- a/src/util/orm.py +++ b/src/util/orm.py @@ -42,7 +42,7 @@ from .exceptions import MigrationError from .helpers import table_of_model from .misc import chunks, log_progress, version_between, version_gte -from .pg import SQLStr, column_exists, format_query, get_columns, query_ids +from .pg import SQLStr, column_exists, format_query, get_columns, named_cursor, query_ids # python3 shims try: @@ -422,6 +422,19 @@ def __init__(self, model, *args, **kw): self._patch = None self._it = chunks(self._ids, self._chunk_size, fmt=self._browse) + def _values_query(self, query): + cr = self._model.env.cr + cr.execute(format_query(cr, "WITH query AS ({}) SELECT count(*) FROM query", SQLStr(query))) + size = cr.fetchone()[0] + + def get_values(): + with named_cursor(cr, itersize=self._chunk_size) as ncr: + ncr.execute(SQLStr(query)) + for row in ncr.iterdict(): + yield row + + return size, get_values() + def _browse(self, ids): next(self._end(), None) args = self._cr_uid + (list(ids),) @@ -473,35 +486,47 @@ def caller(*args, **kwargs): self._it = None return caller - def create(self, values, **kw): + def create(self, values=None, query=None, **kw): """ Create records. An alternative to the default `create` method of the ORM that is safe to use to create millions of records. - :param list(dict) values: list of values of the records to create + :param iterable(dict) values: iterable of values of the records to create + :param int size: the no. of elements produced by values, required if values is a generator + :param str query: alternative to values, SQL query that can produce them. + *No* DML statements allowed. Only SELECT. :param bool multi: whether to use the multi version of `create`, by default is `True` from Odoo 12 and above """ multi = kw.pop("multi", version_gte("saas~11.5")) + size = kw.pop("size", None) if kw: raise TypeError("Unknown arguments: %s" % ", ".join(kw)) - if not values: - raise ValueError("`create` cannot be called with an empty `values` argument") + if not (values is None) ^ (query is None): + raise ValueError("`create` needs to be called using exactly one of `values` or `query` arguments") if self._size: raise ValueError("`create` can only called on empty `browse_record` objects.") - ids = [] - size = len(values) + if query: + size, values = self._values_query(query) + + if size is None: + try: + size = len(values) + except TypeError: + raise ValueError("When passing a generator of values, the size kwarg is mandatory") + it = chunks(values, self._chunk_size, fmt=list) if self._logger: sz = (size + self._chunk_size - 1) // self._chunk_size qualifier = "env[%r].create([:%d])" % (self._model._name, self._chunk_size) it = log_progress(it, self._logger, qualifier=qualifier, size=sz) + ids = [] self._patch = no_selection_cache_validation() for sub_values in it: self._patch.start() From ede78cd1fd0b5d085d8bed51dd4625f021f4cf8a Mon Sep 17 00:00:00 2001 From: "Carsten Wolff (cawo)" Date: Fri, 12 Sep 2025 07:31:23 +0000 Subject: [PATCH 6/7] [IMP] orm: add optional parallelism to iter_browse.__attr__() In some cases, e.g. if it is known that calling a certain method on the model will only trigger inserts or it is clear that updates will be disjunct, such method calls can be done in parallel. --- src/util/orm.py | 99 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 12 deletions(-) diff --git a/src/util/orm.py b/src/util/orm.py index 8638f9a85..d32df6f80 100644 --- a/src/util/orm.py +++ b/src/util/orm.py @@ -9,13 +9,22 @@ on this module work along the ORM of *all* supported versions. """ +import collections import logging +import multiprocessing +import os import re +import sys from contextlib import contextmanager from functools import wraps -from itertools import chain +from itertools import chain, repeat from textwrap import dedent +try: + from concurrent.futures import ProcessPoolExecutor +except ImportError: + ProcessPoolExecutor = None + try: from unittest.mock import patch except ImportError: @@ -27,9 +36,9 @@ except ImportError: from odoo import SUPERUSER_ID from odoo import fields as ofields - from odoo import modules, release + from odoo import modules, release, sql_db except ImportError: - from openerp import SUPERUSER_ID, modules, release + from openerp import SUPERUSER_ID, modules, release, sql_db try: from openerp import fields as ofields @@ -41,8 +50,8 @@ from .const import BIG_TABLE_THRESHOLD from .exceptions import MigrationError from .helpers import table_of_model -from .misc import chunks, log_progress, version_between, version_gte -from .pg import SQLStr, column_exists, format_query, get_columns, named_cursor, query_ids +from .misc import chunks, log_progress, str2bool, version_between, version_gte +from .pg import SQLStr, column_exists, format_query, get_columns, get_max_workers, named_cursor, query_ids # python3 shims try: @@ -52,6 +61,10 @@ _logger = logging.getLogger(__name__) +UPG_PARALLEL_ITER_BROWSE = str2bool(os.environ.get("UPG_PARALLEL_ITER_BROWSE", "0")) +# FIXME: for CI! Remove before merge +UPG_PARALLEL_ITER_BROWSE = True + def env(cr): """ @@ -329,6 +342,21 @@ def recompute_fields(cr, model, fields, ids=None, logger=_logger, chunk_size=256 invalidate(records) +def _mp_iter_browse_cb(ids_or_values, params): + me = _mp_iter_browse_cb + # init upon first call. Done here instead of initializer callback, because py3.6 doesn't have it + if not hasattr(me, "env"): + sql_db._Pool = None # children cannot borrow from copies of the same pool, it will cause protocol error + me.env = env(sql_db.db_connect(params["dbname"]).cursor()) + me.env.clear() + # process + if params["mode"] == "browse": + getattr( + me.env[params["model_name"]].with_context(params["context"]).browse(ids_or_values), params["attr_name"] + )(*params["args"], **params["kwargs"]) + me.env.cr.commit() + + class iter_browse(object): """ Iterate over recordsets. @@ -390,6 +418,7 @@ class iter_browse(object): "_query", "_size", "_strategy", + "_superchunk_size", "_yield_chunks", ) @@ -402,9 +431,30 @@ def __init__(self, model, *args, **kw): self._query = kw.pop("query", None) self._chunk_size = kw.pop("chunk_size", 200) # keyword-only argument self._yield_chunks = kw.pop("yield_chunks", False) + self._superchunk_size = self._chunk_size self._logger = kw.pop("logger", _logger) self._strategy = kw.pop("strategy", "flush") - assert self._strategy in {"flush", "commit"} + assert self._strategy in {"flush", "commit", "multiprocessing"} + if self._strategy == "multiprocessing": + if not ProcessPoolExecutor: + raise ValueError("multiprocessing strategy can not be used in scripts run by python2") + if UPG_PARALLEL_ITER_BROWSE: + self._superchunk_size = min(get_max_workers() * 10 * self._chunk_size, 1000000) + else: + self._strategy = "commit" # downgrade + if self._size > 100000: + _logger.warning( + "Browsing %d %s, which may take a long time. " + "This can be sped up by setting the env variable UPG_PARALLEL_ITER_BROWSE to 1. " + "If you do, be sure to examine the results carefully.", + self._size, + self._model._name, + ) + else: + _logger.info( + "Caller requested multiprocessing strategy, but UPG_PARALLEL_ITER_BROWSE env var is not set. " + "Downgrading strategy to commit.", + ) if kw: raise TypeError("Unknown arguments: %s" % ", ".join(kw)) @@ -412,7 +462,7 @@ def __init__(self, model, *args, **kw): raise TypeError("Must be initialized using exactly one of `ids` or `query`") if self._query: - self._ids = query_ids(self._model.env.cr, self._query, itersize=self._chunk_size) + self._ids = query_ids(self._model.env.cr, self._query, itersize=self._superchunk_size) if not self._size: try: @@ -445,7 +495,7 @@ def _browse(self, ids): return self._model.browse(*args) def _end(self): - if self._strategy == "commit": + if self._strategy in ["commit", "multiprocessing"]: self._model.env.cr.commit() else: flush(self._model) @@ -473,18 +523,43 @@ def __getattr__(self, attr): if not callable(getattr(self._model, attr)): raise TypeError("The attribute %r is not callable" % attr) - it = self._it + it = chunks(self._ids, self._superchunk_size, fmt=self._browse) if self._logger: - sz = (self._size + self._chunk_size - 1) // self._chunk_size - qualifier = "%s[:%d]" % (self._model._name, self._chunk_size) + sz = (self._size + self._superchunk_size - 1) // self._superchunk_size + qualifier = "%s[:%d]" % (self._model._name, self._superchunk_size) it = log_progress(it, self._logger, qualifier=qualifier, size=sz) def caller(*args, **kwargs): args = self._cr_uid + args return [getattr(chnk, attr)(*args, **kwargs) for chnk in chain(it, self._end())] + def caller_multiprocessing(*args, **kwargs): + params = { + "dbname": self._model.env.cr.dbname, + "model_name": self._model._name, + # convert to dict for pickle. Will still break if any value in the context is not pickleable + "context": dict(self._model.env.context), + "attr_name": attr, + "args": self._cr_uid + args, + "kwargs": kwargs, + "mode": "browse", + } + self._model.env.cr.commit() + extrakwargs = {"mp_context": multiprocessing.get_context("fork")} if sys.version_info >= (3, 7) else {} + with ProcessPoolExecutor(max_workers=get_max_workers(), **extrakwargs) as executor: + for chunk in it: + collections.deque( + executor.map( + _mp_iter_browse_cb, chunks(chunk._ids, self._chunk_size, fmt=tuple), repeat(params) + ), + maxlen=0, + ) + next(self._end(), None) + # do not return results in // mode, we expect it to be used for huge numbers of + # records and thus would risk MemoryError, also we cannot know if what attr returns is pickleable + self._it = None - return caller + return caller_multiprocessing if self._strategy == "multiprocessing" else caller def create(self, values=None, query=None, **kw): """ From a0538d79afd00f7e1f61e1cac9b1d9bf265f9f1f Mon Sep 17 00:00:00 2001 From: "Carsten Wolff (cawo)" Date: Tue, 30 Sep 2025 07:43:06 +0000 Subject: [PATCH 7/7] [IMP] orm: add optional parallelism to iter_browse.create() Like the same support added to `__attr__` in the parent commit, this can only be used by callers when it is known that database modifications will be distinct, not causing concurrency issues or side-effects on the results. `create` returns an `iter_browse` object for the caller to browse created records. With the multiprocessing strategy, we make the following changes to it: - To support vast amounts of created records in multiprocessing strategy, we process values in a generator and initialize the returned `iter_browse` object with it. As this requires the caller of `create` to always consume/iterate the result (otherwise records will not be created), it is not applied to the other strategies as it would break existing API. - make __iter__ yield chunks if strategy is multiprocessing. This way, a caller can process chunks of freshly created records `for records in util.iter_browse(strategy="multiprocessing").create(SQLStr)` and since everything from input to output is a generator, will be perfectly memory efficient. - do not pass the logger to the returned `iter_browse` object from `create`, if the strategy is multiprocessing, because it will lead to interleaved logging from the input generator and this one when the caller iterates it. --- src/util/orm.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/util/orm.py b/src/util/orm.py index d32df6f80..9ee1fd70e 100644 --- a/src/util/orm.py +++ b/src/util/orm.py @@ -354,7 +354,12 @@ def _mp_iter_browse_cb(ids_or_values, params): getattr( me.env[params["model_name"]].with_context(params["context"]).browse(ids_or_values), params["attr_name"] )(*params["args"], **params["kwargs"]) + if params["mode"] == "create": + new_ids = me.env[params["model_name"]].with_context(params["context"]).create(ids_or_values).ids me.env.cr.commit() + if params["mode"] == "create": + return new_ids + return None class iter_browse(object): @@ -595,6 +600,12 @@ def create(self, values=None, query=None, **kw): except TypeError: raise ValueError("When passing a generator of values, the size kwarg is mandatory") + if self._strategy == "multiprocessing": + return self._create_multiprocessing(values, size, multi) + + return self._create(values, size, multi) + + def _create(self, values, size, multi): it = chunks(values, self._chunk_size, fmt=list) if self._logger: sz = (size + self._chunk_size - 1) // self._chunk_size @@ -620,6 +631,48 @@ def create(self, values=None, query=None, **kw): self._model, *args, chunk_size=self._chunk_size, logger=self._logger, strategy=self._strategy ) + def _create_multiprocessing(self, values, size, multi): + if not multi: + raise ValueError("The multiprocessing strategy only supports the multi version of `create`") + + it = chunks(values, self._superchunk_size, fmt=list) + if self._logger: + sz = (size + self._superchunk_size - 1) // self._superchunk_size + qualifier = "env[%r].create([:%d])" % (self._model._name, self._superchunk_size) + it = log_progress(it, self._logger, qualifier=qualifier, size=sz) + + def iter_proc(): + params = { + "dbname": self._model.env.cr.dbname, + "model_name": self._model._name, + # convert to dict for pickle. Will still break if any value in the context is not pickleable + "context": dict(self._model.env.context), + "mode": "create", + } + self._model.env.cr.commit() + self._patch.start() + extrakwargs = {"mp_context": multiprocessing.get_context("fork")} if sys.version_info >= (3, 7) else {} + with ProcessPoolExecutor(max_workers=get_max_workers(), **extrakwargs) as executor: + for sub_values in it: + for task_result in executor.map( + _mp_iter_browse_cb, chunks(sub_values, self._chunk_size, fmt=tuple), repeat(params) + ): + self._model.env.cr.commit() # make task_result visible on main cursor before yielding ids + for new_id in task_result: + yield new_id + next(self._end(), None) + + self._patch = no_selection_cache_validation() + args = self._cr_uid + (iter_proc(),) + kwargs = { + "size": size, + "chunk_size": self._chunk_size, + "logger": None, + "strategy": self._strategy, + "yield_chunks": self._yield_chunks, + } + return iter_browse(self._model, *args, **kwargs) + @contextmanager def custom_module_field_as_manual(env, rollback=True, do_flush=False):