diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f58827d..103f1a9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,3 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - name: Run search_api2 tests on: @@ -21,6 +18,9 @@ on: jobs: build: runs-on: ubuntu-latest + env: + ELASTICSEARCH_AUTH_USERNAME: elastic + ELASTICSEARCH_AUTH_PASSWORD: changeme steps: - name: Check out GitHub repo diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f74a0c..bb2436d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,12 +11,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Upgraded Python to version 3.9.19 in test workflows and Dockerfile - Updated integration tests README file +- Required tests to now use authentication against elasticsearch ### Fixed - Container/service shutdown issues; all unit and integration tests now pass locally ### Security - Vendored `kbase-jsonrpcbase` 0.3.0a6 and `jsonrpc11base` to resolve dependency conflicts +- Now requires credentials for connection to elasticsearch + ## [1.0.0] - 2021-04-20 ### Fixed diff --git a/docker-compose.yaml b/docker-compose.yaml index e8b4e5e..574c409 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -17,6 +17,8 @@ services: - DEVELOPMENT=1 - PYTHONUNBUFFERED=true - ELASTICSEARCH_URL=http://elasticsearch:9200 + - ELASTICSEARCH_AUTH_USERNAME=elastic + - ELASTICSEARCH_AUTH_PASSWORD=changeme - WORKERS=2 elasticsearch: @@ -25,7 +27,8 @@ services: - "ES_JAVA_OPTS=-Xms512m -Xmx512m" - bootstrap.memory_lock=true - discovery.type=single-node - - xpack.security.enabled=false + - xpack.security.enabled=true + - ELASTIC_PASSWORD=changeme ports: - "127.0.0.1:9200:9200" - "127.0.0.1:9300:9300" diff --git a/src/es_client/query.py b/src/es_client/query.py index 65d4be3..1015f78 100644 --- a/src/es_client/query.py +++ b/src/es_client/query.py @@ -84,12 +84,12 @@ def search(params, meta): if params.get('track_total_hits'): options['track_total_hits'] = params.get('track_total_hits') - headers = {'Content-Type': 'application/json'} - # Allows index exclusion; otherwise there is an error params = {'allow_no_indices': 'true'} - resp = requests.post(url, data=json.dumps(options), params=params, headers=headers) + resp = requests.post( + url, data=json.dumps(options), params=params, headers=config['elasticsearch_headers'] + ) # nosec B113 if not resp.ok: _handle_es_err(resp) diff --git a/src/search2_rpc/service.py b/src/search2_rpc/service.py index 4f54b77..ddd564d 100644 --- a/src/search2_rpc/service.py +++ b/src/search2_rpc/service.py @@ -28,7 +28,7 @@ def show_indexes(params, meta): prefix = config['index_prefix'] resp = requests.get( config['elasticsearch_url'] + '/_cat/indices/' + prefix + '*?format=json', - headers={'Content-Type': 'application/json'}, + headers=config['elasticsearch_headers'], ) if not resp.ok: raise ElasticsearchError(resp.text) diff --git a/src/server/__main__.py b/src/server/__main__.py index f30a9a4..601585d 100644 --- a/src/server/__main__.py +++ b/src/server/__main__.py @@ -136,7 +136,7 @@ def _get_status_code(result: dict) -> int: # Wait for dependencies to start logger.info('Checking connection to elasticsearch') -wait_for_service(config['elasticsearch_url'], 'Elasticsearch') +wait_for_service(config['elasticsearch_url'], 'Elasticsearch', config['elasticsearch_headers']) # Start the server app.run( host='0.0.0.0', # nosec diff --git a/src/utils/config.py b/src/utils/config.py index f5bae4f..66da593 100644 --- a/src/utils/config.py +++ b/src/utils/config.py @@ -1,18 +1,33 @@ import yaml import urllib.request import os +import base64 + + +def auth_header_encoder(username, password): + """ + Encodes username and password for a Basic Authentication header. + Raises RuntimeError if either username or password is not provided. + """ + if not (username and password): + raise RuntimeError( + "Elasticsearch authentication credentials are required. " + "Set ELASTICSEARCH_AUTH_USERNAME and ELASTICSEARCH_AUTH_PASSWORD environment variables." + ) + credentials = f"{username}:{password}" + credentials_bytes = credentials.encode('utf-8') + base64_credentials = base64.b64encode(credentials_bytes).decode('utf-8') + return f"Basic {base64_credentials}" def init_config(): """ - Initialize configuration data for the whole app + Initialize configuration data for the whole app. """ - # TODO: it might be better to NOT default to testing configuration, - # but rather explicitly set the test environment. - # Reason? A failure to configure one of these in prod could lead to - # confusing failure conditions. ws_url = os.environ.get('WORKSPACE_URL', 'https://ci.kbase.us/services/ws').strip('/') es_url = os.environ.get('ELASTICSEARCH_URL', 'http://localhost:9200').strip('/') + es_auth_username = os.environ.get('ELASTICSEARCH_AUTH_USERNAME') + es_auth_password = os.environ.get('ELASTICSEARCH_AUTH_PASSWORD') index_prefix = os.environ.get('INDEX_PREFIX', 'test') prefix_delimiter = os.environ.get('INDEX_PREFIX_DELIMITER', '.') suffix_delimiter = os.environ.get('INDEX_SUFFIX_DELIMITER', '_') @@ -24,6 +39,13 @@ def init_config(): 'USER_PROFILE_URL', 'https://ci.kbase.us/services/user_profile/rpc/' ) + + auth_header_value = auth_header_encoder(es_auth_username, es_auth_password) + elasticsearch_headers = { + 'Content-Type': 'application/json', + 'Authorization': auth_header_value + } + # Load the global configuration release (non-environment specific, public config) allowed_protocols = ('https://', 'http://', 'file://') matches_protocol = (config_url.startswith(prot) for prot in allowed_protocols) @@ -33,10 +55,12 @@ def init_config(): global_config = yaml.safe_load(res) with open('VERSION') as fd: app_version = fd.read().replace('\n', '') + return { 'dev': bool(os.environ.get('DEVELOPMENT')), 'global': global_config, 'elasticsearch_url': es_url, + 'elasticsearch_headers': elasticsearch_headers, 'index_prefix': index_prefix, 'prefix_delimiter': prefix_delimiter, 'suffix_delimiter': suffix_delimiter, diff --git a/src/utils/wait_for_service.py b/src/utils/wait_for_service.py index 6eff310..a21ad87 100644 --- a/src/utils/wait_for_service.py +++ b/src/utils/wait_for_service.py @@ -7,12 +7,12 @@ WAIT_POLL_INTERVAL = 5 -def wait_for_service(url, name, timeout=DEFAULT_TIMEOUT): +def wait_for_service(url, name, headers, timeout=DEFAULT_TIMEOUT): start = time.time() while True: logger.info(f'Attempting to connect to {name} at {url}') try: - requests.get(url, timeout=timeout).raise_for_status() + requests.get(url, timeout=timeout, headers=headers).raise_for_status() logger.info(f'{name} is online!') break except Exception: diff --git a/tests/helpers/init_elasticsearch.py b/tests/helpers/init_elasticsearch.py index dea80f0..fbdae22 100644 --- a/tests/helpers/init_elasticsearch.py +++ b/tests/helpers/init_elasticsearch.py @@ -3,6 +3,7 @@ from src.utils.config import config + # TODO use a util for creating index names narrative_index_name = ''.join([ config['index_prefix'], @@ -50,36 +51,8 @@ ] -def init_elasticsearch(): - """ - Initialize the indexes and documents on elasticsearch before running tests. - """ - global _COMPLETED - if _COMPLETED: - return - for index_name in index_names: - create_index(index_name) - create_index(narrative_index_name) - for index_name in index_names: - for doc in test_docs: - create_doc(index_name, doc) - for doc in narrative_docs: - create_doc(narrative_index_name, doc) - # create default_search alias for all fields. - url = f"{_ES_URL}/_aliases" - alias_name = config['index_prefix'] + config['prefix_delimiter'] + "default_search" - body = { - "actions": [ - {"add": {"indices": index_names, "alias": alias_name}} - ] - } - resp = requests.post(url, data=json.dumps(body), headers={'Content-Type': 'application/json'}) - if not resp.ok: - raise RuntimeError("Error creating aliases on ES:", resp.text) - _COMPLETED = True - - def create_index(index_name): + """Create an Elasticsearch index if it does not already exist.""" # Check if exists resp = requests.head(_ES_URL + '/' + index_name) if resp.status_code == 200: @@ -91,22 +64,51 @@ def create_index(index_name): 'index': {'number_of_shards': 2, 'number_of_replicas': 1} } }), - headers={'Content-Type': 'application/json'}, + headers=config['elasticsearch_headers'], ) if not resp.ok and resp.json()['error']['type'] != 'index_already_exists_exception': raise RuntimeError('Error creating index on ES:', resp.text) def create_doc(index_name, data): + """Create a document in the specified index.""" # Wait for doc to sync - url = '/'.join([ # type: ignore + url = '/'.join([ _ES_URL, index_name, '_doc', data['name'], '?refresh=wait_for' ]) - headers = {'Content-Type': 'application/json'} - resp = requests.put(url, data=json.dumps(data), headers=headers) + resp = requests.put(url, data=json.dumps(data), headers=config['elasticsearch_headers']) if not resp.ok: raise RuntimeError(f"Error creating test doc:\n{resp.text}") + + +def init_elasticsearch(): + """ + Initialize the indexes and documents on elasticsearch before running tests. + """ + global _COMPLETED + if _COMPLETED: + return + for index_name in index_names: + create_index(index_name) + create_index(narrative_index_name) + for index_name in index_names: + for doc in test_docs: + create_doc(index_name, doc) + for doc in narrative_docs: + create_doc(narrative_index_name, doc) + # create default_search alias for all fields. + url = f"{_ES_URL}/_aliases" + alias_name = config['index_prefix'] + config['prefix_delimiter'] + "default_search" + body = { + "actions": [ + {"add": {"indices": index_names, "alias": alias_name}} + ] + } + resp = requests.post(url, data=json.dumps(body), headers=config['elasticsearch_headers']) + if not resp.ok: + raise RuntimeError("Error creating aliases on ES:", resp.text) + _COMPLETED = True diff --git a/tests/helpers/integration_setup.py b/tests/helpers/integration_setup.py index 81b888e..1e24965 100644 --- a/tests/helpers/integration_setup.py +++ b/tests/helpers/integration_setup.py @@ -28,7 +28,7 @@ def start_service(app_url): stdout=container_out, stderr=container_err, cwd=cwd) - wait_for_service(app_url, "search2") + wait_for_service(app_url, "search2", {}) def stop_service(): diff --git a/tests/helpers/unit_setup.py b/tests/helpers/unit_setup.py index c82b37e..ad6bf90 100644 --- a/tests/helpers/unit_setup.py +++ b/tests/helpers/unit_setup.py @@ -1,5 +1,6 @@ import subprocess from src.utils.wait_for_service import wait_for_service +from src.utils.config import config from src.utils.logger import logger import json import os @@ -29,7 +30,7 @@ def start_service(wait_for_url, wait_for_name): container_out = open("container.out", "w") container_err = open("container.err", "w") container_process = subprocess.Popen(cmd, shell=True, stdout=container_out, stderr=container_err) - wait_for_service(wait_for_url, wait_for_name) + wait_for_service(wait_for_url, wait_for_name, config['elasticsearch_headers']) def stop_service(): diff --git a/tests/unit/utils/test_config.py b/tests/unit/utils/test_config.py index d859499..a6888e7 100644 --- a/tests/unit/utils/test_config.py +++ b/tests/unit/utils/test_config.py @@ -1,4 +1,4 @@ -from src.utils.config import init_config +from src.utils.config import init_config, auth_header_encoder import os import pytest @@ -13,3 +13,16 @@ def test_init_config_invalid_config_url(): os.environ['GLOBAL_CONFIG_URL'] = original_url else: os.environ.pop('GLOBAL_CONFIG_URL') + + +@pytest.mark.parametrize("username,password", [ + (None, None), + (None, 'password'), + ('username', None), + ('', ''), + ('', 'password'), + ('username', ''), +]) +def test_auth_header_encoder_missing_credentials(username, password): + with pytest.raises(RuntimeError, match="Elasticsearch authentication credentials are required"): + auth_header_encoder(username, password) diff --git a/tests/unit/utils/test_wait_for_service.py b/tests/unit/utils/test_wait_for_service.py index 43913c0..d17e98b 100644 --- a/tests/unit/utils/test_wait_for_service.py +++ b/tests/unit/utils/test_wait_for_service.py @@ -16,7 +16,7 @@ def bad_url_with_timeout(name, url, timeout, caplog): with caplog.at_level(logging.INFO, logger='search2'): start = time.time() with pytest.raises(SystemExit) as se: - wait_for_service(url, 'foo', timeout=timeout) + wait_for_service(url, 'foo', {}, timeout=timeout) # Ensure it is attempting to exit. assert se.type == SystemExit