Skip to content

Commit 7e47b02

Browse files
authored
fix: add fix for slow full count in postgresql (#2174)
* feat: implement optional count for postgresql provider This work has been done to allow the result count to be enabled or disabled for the PostgreSQL provider. By disabling the count you can get improved performance on large datasets but on smaller datasets this is unlikely to have any affect. * fix: move count to base provider This work has been done to move count to the base provider. While doing this work I also added a debug log message to state when the count had been disabled in the SQL provider. Also, I removed some tests that were no longer needed after the introduction of the str2bool function, when getting the count value from the configuration file. * fix: convert string true to boolean
1 parent 68f5503 commit 7e47b02

File tree

4 files changed

+48
-7
lines changed

4 files changed

+48
-7
lines changed

docs/source/publishing/ogcapi-features.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ Must have PostGIS installed.
625625
id_field: osm_id
626626
table: hotosm_bdi_waterways
627627
geom_field: foo_geom
628+
count: true # Optional; Default true; Enable/disable count for improved performance.
628629
629630
A number of database connection options can be also configured in the provider in order to adjust properly the sqlalchemy engine client.
630631
These are optional and if not specified, the default from the engine will be used. Please see also `SQLAlchemy docs <https://docs.sqlalchemy.org/en/14/core/engines.html#custom-dbapi-connect-arguments-on-connect-routines>`_.
@@ -662,6 +663,7 @@ These are optional and if not specified, the default from the engine will be use
662663
id_field: osm_id
663664
table: hotosm_bdi_waterways
664665
geom_field: foo_geom
666+
count: true # Optional; Default true; Enable/disable count for improved performance.
665667
666668
The PostgreSQL provider is also able to connect to Cloud SQL databases.
667669

@@ -677,6 +679,7 @@ The PostgreSQL provider is also able to connect to Cloud SQL databases.
677679
password: postgres
678680
id_field: id
679681
table: states
682+
count: true # Optional; Default true; Enable/disable count for improved performance.
680683
681684
This is what a configuration for `Google Cloud SQL`_ connection looks like. The ``host``
682685
block contains the necessary socket connection information.

pygeoapi/provider/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ def __init__(self, provider_def):
5757
:returns: pygeoapi.provider.base.BaseProvider
5858
"""
5959

60+
from pygeoapi.util import str2bool
61+
6062
try:
6163
self.name = provider_def['name']
6264
self.type = provider_def['type']
@@ -65,6 +67,7 @@ def __init__(self, provider_def):
6567
raise RuntimeError('name/type/data are required')
6668

6769
self.editable = provider_def.get('editable', False)
70+
self.count = str2bool(provider_def.get('count', True))
6871
self.options = provider_def.get('options')
6972
self.id_field = provider_def.get('id_field')
7073
self.uri_field = provider_def.get('uri_field')

pygeoapi/provider/sql.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@
9191
ProviderQueryError,
9292
ProviderItemNotFoundError
9393
)
94-
from pygeoapi.util import str2bool
9594

9695
LOGGER = logging.getLogger(__name__)
9796

@@ -128,7 +127,6 @@ def __init__(
128127
self.id_field = provider_def['id_field']
129128
self.geom = provider_def.get('geom_field', 'geom')
130129
self.driver_name = driver_name
131-
self.count = str2bool(provider_def.get('count', True))
132130

133131
LOGGER.debug(f'Name: {self.name}')
134132
LOGGER.debug(f'Table: {self.table}')
@@ -214,18 +212,20 @@ def query(
214212
.options(selected_properties)
215213
)
216214

217-
matched = results.count()
218-
219-
LOGGER.debug(f'Found {matched} result(s)')
220-
221215
LOGGER.debug('Preparing response')
222216
response = {
223217
'type': 'FeatureCollection',
224218
'features': [],
225-
'numberMatched': matched,
226219
'numberReturned': 0
227220
}
228221

222+
if self.count or resulttype == 'hits':
223+
matched = results.count()
224+
response['numberMatched'] = matched
225+
LOGGER.debug(f'Found {matched} result(s)')
226+
else:
227+
LOGGER.debug('Count disabled')
228+
229229
if resulttype == 'hits' or not results:
230230
return response
231231

tests/provider/test_postgresql_provider.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,3 +908,38 @@ def test_transaction_create_handles_invalid_input_data(pg_api_, data):
908908
headers, code, content = manage_collection_item(
909909
pg_api_, req, action='create', dataset='hot_osm_waterways')
910910
assert 'generic error' in content
911+
912+
913+
def test_provider_count_default_value(config):
914+
# Arrange
915+
provider = PostgreSQLProvider(config)
916+
917+
# Act
918+
results = provider.query()
919+
920+
# Assert
921+
assert results['numberMatched'] == 14776
922+
923+
924+
def test_provider_count_false(config):
925+
# Arrange
926+
config['count'] = 'false'
927+
provider = PostgreSQLProvider(config)
928+
929+
# Act
930+
results = provider.query()
931+
932+
# Assert
933+
assert 'numberMatched' not in results
934+
935+
936+
def test_provider_count_false_with_resulttype_hits(config):
937+
# Arrange
938+
config['count'] = 'false'
939+
provider = PostgreSQLProvider(config)
940+
941+
# Act
942+
results = provider.query(resulttype="hits")
943+
944+
# Assert
945+
assert results['numberMatched'] == 14776

0 commit comments

Comments
 (0)