Orbitalize · barroco · Mar 22, 2023 · Mar 16, 2023 · May 3, 2023 · May 3, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,6 +4,8 @@ on:
   push:
     branches:
     - main
+  schedule:
+    - cron: '23 * * * *'
 jobs:
   hygiene-tests:
     name: Repository hygiene
@@ -55,8 +57,20 @@ jobs:
         cd monitoring/mock_uss
         make test
     - name: uss_qualifier tests
+      env:
+        NEW_RELIC_ACCOUNT_ID: ${{ secrets.NEW_RELIC_ACCOUNT_ID }}
+        NRIA_LICENSE_KEY: ${{ secrets.NRIA_LICENSE_KEY }}
+        NEW_RELIC_API_KEY: ${{ secrets.NEW_RELIC_API_KEY }}
+#        NEW_RELIC_LICENSE_KEY: ${{ secrets.NRIA_LICENSE_KEY }}
+        NEW_RELIC_APP_NAME: ci-monitoring-uss_qualifier
       run: |
         cd monitoring/uss_qualifier
+        sudo tcpdump -nn -i any -w sntp.cap &
+        sleep 1
+        make test
+    - name: prober tests
+      run: |
+        cd monitoring/prober
         make test
     - name: prober tests
       run: |
@@ -76,5 +90,16 @@ jobs:
       with:
         name: monitoring-tests-reports
         path: |
+          monitoring/uss_qualifier/report.gv
+          monitoring/uss_qualifier/report.json
+          monitoring/uss_qualifier/tested_requirements.html
+          monitoring/prober/prober_test_results.xml
+    - name: Save tcpdump output on failure
+      if: failure()
+      uses: actions/upload-artifact@v3
+      with:
+        name: monitoring-tests-tcpdump
+        path: |
+          monitoring/uss_qualifier/sntp.cap
           monitoring/uss_qualifier/output
           monitoring/prober/output
diff --git a/monitoring/Dockerfile b/monitoring/Dockerfile
@@ -12,6 +12,12 @@
 FROM python:3.8-slim
 # Not -alpine because: https://stackoverflow.com/a/58028091/651139
 
+RUN apt-get update && apt-get install -y \
+    openssl \
+    curl \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
 RUN apt-get update && apt-get install -y openssl curl libgeos-dev gcc && apt-get install ca-certificates
 RUN mkdir -p /app/monitoring
 COPY ./requirements.txt /app/monitoring/requirements.txt
@@ -34,4 +40,8 @@ ARG version
 ARG commit_hash
 ENV MONITORING_VERSION=$version
 ENV GIT_COMMIT_HASH=$commit_hash
-ENTRYPOINT []
+
+RUN pip install --no-cache-dir newrelic
+ADD ./monitoring/newrelic.ini /app/newrelic.ini
+ENV NEW_RELIC_CONFIG_FILE "/app/newrelic.ini"
+ENTRYPOINT ["newrelic-admin", "run-program"]
diff --git a/monitoring/mock_uss/run_locally_atproxy_client.sh b/monitoring/mock_uss/run_locally_atproxy_client.sh
@@ -19,7 +19,7 @@ ATPROXY_BASIC_AUTH="local_client:local_client"
 ATPROXY_BASE_URL="http://${MOCK_USS_TOKEN_AUDIENCE:-host.docker.internal}:${ATPROXY_PORT}"
 
 if [ "$CI" == "true" ]; then
-  docker_args="--add-host host.docker.internal:host-gateway" # Required to reach other containers in Ubuntu (used for Github Actions)
+  docker_args="--add-host host.docker.internal:host-gateway --network dss_sandbox_default" # Required to reach other containers in Ubuntu (used for Github Actions)
 else
   docker_args="-it"
 fi

diff --git a/monitoring/mock_uss/run_locally_scdsc.sh b/monitoring/mock_uss/run_locally_scdsc.sh
@@ -15,7 +15,7 @@ PORT=8074
 BASE_URL="http://${MOCK_USS_TOKEN_AUDIENCE:-host.docker.internal}:${PORT}"
 
 if [ "$CI" == "true" ]; then
-  docker_args="--add-host host.docker.internal:host-gateway" # Required to reach other containers in Ubuntu (used for Github Actions)
+  docker_args="--add-host host.docker.internal:host-gateway --network dss_sandbox_default" # Required to reach other containers in Ubuntu (used for Github Actions)
 else
   docker_args="-it"
 fi

diff --git a/monitoring/mock_uss/start_all_local_mocks.sh b/monitoring/mock_uss/start_all_local_mocks.sh
@@ -12,6 +12,21 @@ else
 fi
 cd "${BASEDIR}/../.." || exit 1
 
+if [ -n "${NRIA_LICENSE_KEY}" ]; then
+#  curl -Ls https://download.newrelic.com/install/newrelic-cli/scripts/install.sh | bash && sudo NEW_RELIC_API_KEY="${NEW_RELIC_API_KEY}" NEW_RELIC_ACCOUNT_ID="${NEW_RELIC_ACCOUNT_ID}" NEW_RELIC_REGION=EU /usr/local/bin/newrelic install -y
+  docker run \
+  -d \
+  --name newrelic-infra \
+  --network=host \
+  --cap-add=SYS_PTRACE \
+  --privileged \
+  --pid=host \
+  -v "/:/host:ro" \
+  -v "/var/run/docker.sock:/var/run/docker.sock" \
+  -e NRIA_LICENSE_KEY="${NRIA_LICENSE_KEY}" \
+  newrelic/infrastructure:latest
+fi
+
 monitoring/mock_uss/run_locally_scdsc.sh -d
 export DO_NOT_BUILD_MONITORING=true
 monitoring/mock_uss/run_locally_ridsp.sh -d

diff --git a/monitoring/mock_uss/wait_for_mock_uss.sh b/monitoring/mock_uss/wait_for_mock_uss.sh
@@ -48,6 +48,7 @@ while true; do
       if [ $n_delays -gt $max_delays ]; then
         echo ""
         echo "Mock USS container ${MOCK_USS_CONTAINER} did not become healthy in a reasonable amount of time"
+        docker container inspect -f '{{.State.Health}}' "${MOCK_USS_CONTAINER}"
         exit 1
       fi
     fi

diff --git a/monitoring/monitorlib/fetch/__init__.py b/monitoring/monitorlib/fetch/__init__.py
@@ -12,7 +12,7 @@
 
 from implicitdict import ImplicitDict, StringBasedDateTime
 from monitoring.monitorlib import infrastructure
-
+from loguru import logger
 
 TIMEOUTS = (5, 25)  # Timeouts of `connect` and `read` in seconds
 
@@ -202,6 +202,7 @@ def query_and_describe(
         return describe_query(client.request(verb, url, **req_kwargs), t0)
     except (requests.RequestException, urllib3.exceptions.ReadTimeoutError) as e:
         msg = "{}: {}".format(type(e).__name__, str(e))
+        logger.error(f"Catched `query_and_describe` - {msg} - {e}")
     t1 = datetime.datetime.utcnow()
 
     # Reconstruct request similar to the one in the query (which is not

diff --git a/monitoring/monitorlib/infrastructure.py b/monitoring/monitorlib/infrastructure.py
@@ -18,7 +18,7 @@
 
 EPOCH = datetime.datetime.utcfromtimestamp(0)
 TOKEN_REFRESH_MARGIN = datetime.timedelta(seconds=15)
-CLIENT_TIMEOUT = 10  # seconds
+CLIENT_TIMEOUT = 30  # seconds
 
 
 class AuthAdapter(object):

diff --git a/monitoring/newrelic.ini b/monitoring/newrelic.ini
@@ -0,0 +1,205 @@
+# ---------------------------------------------------------------------------
+
+#
+# This file configures the New Relic Python Agent.
+#
+# The path to the configuration file should be supplied to the function
+# newrelic.agent.initialize() when the agent is being initialized.
+#
+# The configuration file follows a structure similar to what you would
+# find for Microsoft Windows INI files. For further information on the
+# configuration file format see the Python ConfigParser documentation at:
+#
+#    http://docs.python.org/library/configparser.html
+#
+# For further discussion on the behaviour of the Python agent that can
+# be configured via this configuration file see:
+#
+#    http://newrelic.com/docs/python/python-agent-configuration
+#
+
+# ---------------------------------------------------------------------------
+
+# Here are the settings that are common to all environments.
+
+[newrelic]
+
+# The application name. Set this to be the name of your
+# application as you would like it to show up in New Relic UI.
+# The UI will then auto-map instances of your application into a
+# entry on your home dashboard page.
+app_name = monitoring-uss-qualifier-ci
+
+# New Relic offers distributed tracing for monitoring and analyzing modern
+# distributed systems.Enable distributed tracing.
+distributed_tracing.enabled = true
+
+# When "true", the agent collects performance data about your
+# application and reports this data to the New Relic UI at
+# newrelic.com. This global switch is normally overridden for
+# each environment below.
+monitor_mode = true
+
+# Sets the name of a file to log agent messages to. Useful for
+# debugging any issues with the agent. This is not set by
+# default as it is not known in advance what user your web
+# application processes will run as and where they have
+# permission to write to. Whatever you set this to you must
+# ensure that the permissions for the containing directory and
+# the file itself are correct, and that the user that your web
+# application runs as can write to the file. If not able to
+# write out a log file, it is also possible to say "stderr" and
+# output to standard error output. This would normally result in
+# output appearing in your web server log.
+#log_file = /tmp/newrelic-python-agent.log
+
+# Sets the level of detail of messages sent to the log file, if
+# a log file location has been provided. Possible values, in
+# increasing order of detail, are: "critical", "error", "warning",
+# "info" and "debug". When reporting any agent issues to New
+# Relic technical support, the most useful setting for the
+# support engineers is "debug". However, this can generate a lot
+# of information very quickly, so it is best not to keep the
+# agent at this level for longer than it takes to reproduce the
+# problem you are experiencing.
+log_level = info
+
+# The Python Agent communicates with the New Relic service using
+# SSL by default. Note that this does result in an increase in
+# CPU overhead, over and above what would occur for a non SSL
+# connection, to perform the encryption involved in the SSL
+# communication. This work is though done in a distinct thread
+# to those handling your web requests, so it should not impact
+# response times. You can if you wish revert to using a non SSL
+# connection, but this will result in information being sent
+# over a plain socket connection and will not be as secure.
+ssl = true
+
+# High Security Mode enforces certain security settings, and
+# prevents them from being overridden, so that no sensitive data
+# is sent to New Relic. Enabling High Security Mode means that
+# SSL is turned on, request parameters are not collected, and SQL
+# can not be sent to New Relic in its raw form. To activate High
+# Security Mode, it must be set to 'true' in this local .ini
+# configuration file AND be set to 'true' in the server-side
+# configuration in the New Relic user interface. For details, see
+# https://docs.newrelic.com/docs/subscriptions/high-security
+high_security = false
+
+# The Python Agent will attempt to connect directly to the New
+# Relic service. If there is an intermediate firewall between
+# your host and the New Relic service that requires you to use a
+# HTTP proxy, then you should set both the "proxy_host" and
+# "proxy_port" settings to the required values for the HTTP
+# proxy. The "proxy_user" and "proxy_pass" settings should
+# additionally be set if proxy authentication is implemented by
+# the HTTP proxy. The "proxy_scheme" setting dictates what
+# protocol scheme is used in talking to the HTTP proxy. This
+# would normally always be set as "http" which will result in the
+# agent then using a SSL tunnel through the HTTP proxy for end to
+# end encryption.
+# proxy_scheme = http
+# proxy_host = hostname
+# proxy_port = 8080
+# proxy_user =
+# proxy_pass =
+
+# Capturing request parameters is off by default. To enable the
+# capturing of request parameters, first ensure that the setting
+# "attributes.enabled" is set to "true" (the default value), and
+# then add "request.parameters.*" to the "attributes.include"
+# setting. For details about attributes configuration, please
+# consult the documentation.
+# attributes.include = request.parameters.*
+
+# The transaction tracer captures deep information about slow
+# transactions and sends this to the UI on a periodic basis. The
+# transaction tracer is enabled by default. Set this to "false"
+# to turn it off.
+transaction_tracer.enabled = true
+
+# Threshold in seconds for when to collect a transaction trace.
+# When the response time of a controller action exceeds this
+# threshold, a transaction trace will be recorded and sent to
+# the UI. Valid values are any positive float value, or (default)
+# "apdex_f", which will use the threshold for a dissatisfying
+# Apdex controller action - four times the Apdex T value.
+transaction_tracer.transaction_threshold = apdex_f
+
+# When the transaction tracer is on, SQL statements can
+# optionally be recorded. The recorder has three modes, "off"
+# which sends no SQL, "raw" which sends the SQL statement in its
+# original form, and "obfuscated", which strips out numeric and
+# string literals.
+transaction_tracer.record_sql = obfuscated
+
+# Threshold in seconds for when to collect stack trace for a SQL
+# call. In other words, when SQL statements exceed this
+# threshold, then capture and send to the UI the current stack
+# trace. This is helpful for pinpointing where long SQL calls
+# originate from in an application.
+transaction_tracer.stack_trace_threshold = 0.5
+
+# Determines whether the agent will capture query plans for slow
+# SQL queries. Only supported in MySQL and PostgreSQL. Set this
+# to "false" to turn it off.
+transaction_tracer.explain_enabled = true
+
+# Threshold for query execution time below which query plans
+# will not not be captured. Relevant only when "explain_enabled"
+# is true.
+transaction_tracer.explain_threshold = 0.5
+
+# Space separated list of function or method names in form
+# 'module:function' or 'module:class.function' for which
+# additional function timing instrumentation will be added.
+transaction_tracer.function_trace =
+
+# The error collector captures information about uncaught
+# exceptions or logged exceptions and sends them to UI for
+# viewing. The error collector is enabled by default. Set this
+# to "false" to turn it off.
+error_collector.enabled = true
+
+# To stop specific errors from reporting to the UI, set this to
+# a space separated list of the Python exception type names to
+# ignore. The exception name should be of the form 'module:class'.
+error_collector.ignore_errors =
+
+# Browser monitoring is the Real User Monitoring feature of the UI.
+# For those Python web frameworks that are supported, this
+# setting enables the auto-insertion of the browser monitoring
+# JavaScript fragments.
+browser_monitoring.auto_instrument = true
+
+# A thread profiling session can be scheduled via the UI when
+# this option is enabled. The thread profiler will periodically
+# capture a snapshot of the call stack for each active thread in
+# the application to construct a statistically representative
+# call tree.
+thread_profiler.enabled = true
+
+# ---------------------------------------------------------------------------
+
+#
+# The application environments. These are specific settings which
+# override the common environment settings. The settings related to a
+# specific environment will be used when the environment argument to the
+# newrelic.agent.initialize() function has been defined to be either
+# "development", "test", "staging" or "production".
+#
+
+[newrelic:development]
+monitor_mode = true
+
+[newrelic:test]
+monitor_mode = true
+
+[newrelic:staging]
+app_name = monitoring (Staging)
+monitor_mode = true
+
+[newrelic:production]
+monitor_mode = true
+
+# ---------------------------------------------------------------------------
diff --git a/monitoring/prober/run_locally.sh b/monitoring/prober/run_locally.sh
@@ -30,6 +30,11 @@ for container_name in "${localhost_containers[@]}"; do
 	fi
 done
 
+echo "Re/Create prober_test_results.xml file"
+RESULTFILE="$(pwd)/monitoring/prober/prober_test_results.xml"
+touch "${RESULTFILE}"
+cat /dev/null > "${RESULTFILE}"
+
 OUTPUT_DIR="monitoring/prober/output"
 mkdir -p "$OUTPUT_DIR"
-Original file line number
+Diff line change
@@ Expand Up / @@ -48,6 +48,7 @@ while true; do @@
           if [ $n_delays -gt $max_delays ]; then
             echo ""
             echo "Mock USS container ${MOCK_USS_CONTAINER} did not become healthy in a reasonable amount of time"
+            docker container inspect -f '{{.State.Health}}' "${MOCK_USS_CONTAINER}"
             exit 1
           fi
         fi
@@ Expand Down @@