diff --git a/deployment/clouddeploy/gke-workers/clouddeploy.yaml b/deployment/clouddeploy/gke-workers/clouddeploy.yaml index 8bbb701b77f..e9f00949cb5 100644 --- a/deployment/clouddeploy/gke-workers/clouddeploy.yaml +++ b/deployment/clouddeploy/gke-workers/clouddeploy.yaml @@ -9,6 +9,8 @@ serialPipeline: profiles: [ staging ] - targetId: production-workers profiles: [ prod ] + - targetId: private-workers + profiles: [ private ] --- apiVersion: deploy.cloud.google.com/v1 @@ -37,3 +39,17 @@ executionConfigs: - RENDER - DEPLOY serviceAccount: deployment@oss-vdb.iam.gserviceaccount.com +--- + +apiVersion: deploy.cloud.google.com/v1 +kind: Target +metadata: + name: private-workers +description: Private temporary worker cluster +gke: + cluster: projects/oss-vdb-test/locations/us-central1-f/clusters/workers-private +executionConfigs: +- usages: + - RENDER + - DEPLOY + serviceAccount: deployment@oss-vdb.iam.gserviceaccount.com diff --git a/deployment/clouddeploy/gke-workers/environments/private/backup.yaml b/deployment/clouddeploy/gke-workers/environments/private/backup.yaml new file mode 100644 index 00000000000..52e7cf8e4b4 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/backup.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: backup +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: cron + env: + - name: GOOGLE_CLOUD_PROJECT + value: "oss-vdb-test" + - name: DATASTORE_DATABASE_ID + value: "datastore-private" + - name: AFFECTED_COMMITS_BACKUP_BUCKET + value: "osv-test-affected-commits-private" \ No newline at end of file diff --git a/deployment/clouddeploy/gke-workers/environments/private/exporter.yaml b/deployment/clouddeploy/gke-workers/environments/private/exporter.yaml new file mode 100644 index 00000000000..63b8ca1530c --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/exporter.yaml @@ -0,0 +1,20 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: exporter +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: exporter + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + args: + - "--upload-to-gcs=true" + - "--workers=600" + - "--bucket=osv-test-vulnerabilities-private" + - "--osv-vulns-bucket=osv-test-vulnerabilities-private" + diff --git a/deployment/clouddeploy/gke-workers/environments/private/gitter.yaml b/deployment/clouddeploy/gke-workers/environments/private/gitter.yaml new file mode 100644 index 00000000000..578b22bd164 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/gitter.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gitter +spec: + template: + spec: + containers: + - name: gitter + args: + - "--port=8888" + - "--work-dir=/work/gitter" + - "--fetch-timeout=1h" + - "--repo-cache-ttl=1h" + - "--repo-cache-max-cost=100GiB" + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: gitter-disk-pv +spec: + csi: + driver: pd.csi.storage.gke.io + volumeHandle: projects/oss-vdb-test/zones/us-central1-f/disks/gitter-disk-private + fsType: ext4 diff --git a/deployment/clouddeploy/gke-workers/environments/private/importer-deleter.yaml b/deployment/clouddeploy/gke-workers/environments/private/importer-deleter.yaml new file mode 100644 index 00000000000..11c2ed8d63c --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/importer-deleter.yaml @@ -0,0 +1,22 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: importer-deleter +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: importer-deleter + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + - name: DATASTORE_DATABASE_ID + value: "datastore-private" + - name: WORKER_TASK_TOPIC + value: "private-tasks" + image: importer + args: + - --delete + - --delete-threshold-pct=2 diff --git a/deployment/clouddeploy/gke-workers/environments/private/importer-reconciler.yaml b/deployment/clouddeploy/gke-workers/environments/private/importer-reconciler.yaml new file mode 100644 index 00000000000..3dceeb4e5c8 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/importer-reconciler.yaml @@ -0,0 +1,33 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: importer-reconciler +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: importer + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + - name: DATASTORE_DATABASE_ID + value: "datastore-private" + - name: WORKER_TASK_TOPIC + value: "private-tasks" + # We don't have a dedicated reimport pool for this private deployment, + # so route reimports to the default pool. + - name: REIMPORT_TASK_POOL + value: "default" + args: ["--reconcile=true"] +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: importer-reconciler-git-cache-pv +spec: + csi: + driver: pd.csi.storage.gke.io + volumeHandle: projects/oss-vdb-test/zones/us-central1-f/disks/importer-reconciler-git-cache-private + fsType: ext4 diff --git a/deployment/clouddeploy/gke-workers/environments/private/importer.yaml b/deployment/clouddeploy/gke-workers/environments/private/importer.yaml new file mode 100644 index 00000000000..9adab2b252d --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/importer.yaml @@ -0,0 +1,27 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: importer +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: importer + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + - name: DATASTORE_DATABASE_ID + value: "datastore-private" + - name: WORKER_TASK_TOPIC + value: "private-tasks" + # We don't have a dedicated reimport pool for this private deployment, + # so route reimports to the default pool. + - name: REIMPORT_TASK_POOL + value: "default" + args: + # Note that with https://github.com/google/osv.dev/pull/2766 + # addition per-repository settings make this *really* take effect, see + # https://github.com/google/osv.dev/pull/2837 + - "--strict-validation" diff --git a/deployment/clouddeploy/gke-workers/environments/private/kustomization.yaml b/deployment/clouddeploy/gke-workers/environments/private/kustomization.yaml new file mode 100644 index 00000000000..63798c1c67e --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/kustomization.yaml @@ -0,0 +1,26 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +# - ../../base/backup.yaml # TODO(michaelkedar): figure out how to backup specific datastore IDs +- ../../base/exporter.yaml +- ../../base/gitter.yaml +- ../../base/importer.yaml +- ../../base/importer-deleter.yaml +- ../../base/importer-reconciler.yaml +- ../../base/record-checker.yaml +- ../../base/recoverer.yaml +- ../../base/workers.yaml +- ../../base/scaler.yaml + +patches: +# - path: backup.yaml +- path: exporter.yaml +- path: gitter.yaml +- path: importer.yaml +- path: importer-deleter.yaml +- path: importer-reconciler.yaml +- path: record-checker.yaml +- path: recoverer.yaml +- path: workers.yaml +- path: scaler.yaml diff --git a/deployment/clouddeploy/gke-workers/environments/private/record-checker.yaml b/deployment/clouddeploy/gke-workers/environments/private/record-checker.yaml new file mode 100644 index 00000000000..4b9aa121389 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/record-checker.yaml @@ -0,0 +1,20 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: record-checker +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: record-checker + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + - name: OSV_VULNERABILITIES_BUCKET + value: osv-test-vulnerabilities-private + - name: DATASTORE_DATABASE_ID + value: "datastore-private" + - name: FAILED_TASKS_TOPIC + value: "failed-private-tasks" diff --git a/deployment/clouddeploy/gke-workers/environments/private/recoverer.yaml b/deployment/clouddeploy/gke-workers/environments/private/recoverer.yaml new file mode 100644 index 00000000000..020c5fe4442 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/recoverer.yaml @@ -0,0 +1,20 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: recoverer +spec: + template: + spec: + containers: + - name: recoverer + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + - name: OSV_VULNERABILITIES_BUCKET + value: osv-test-vulnerabilities-private + - name: DATASTORE_DATABASE_ID + value: "datastore-private" + - name: FAILED_TASKS_SUBSCRIPTION + value: "private-recovery" + - name: WORKER_TASK_TOPIC + value: "private-tasks" diff --git a/deployment/clouddeploy/gke-workers/environments/private/scaler.yaml b/deployment/clouddeploy/gke-workers/environments/private/scaler.yaml new file mode 100644 index 00000000000..2245999c1b6 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/scaler.yaml @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: pubsub +spec: + # oss-vdb-test does not have local SSD quota to support 400 workers + maxReplicas: 250 diff --git a/deployment/clouddeploy/gke-workers/environments/private/workers.yaml b/deployment/clouddeploy/gke-workers/environments/private/workers.yaml new file mode 100644 index 00000000000..2abb62a80e7 --- /dev/null +++ b/deployment/clouddeploy/gke-workers/environments/private/workers.yaml @@ -0,0 +1,33 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: workers +spec: + template: + spec: + tolerations: + - key: workloadType + operator: Equal + value: worker-pool + nodeSelector: + workloadType: worker-pool + containers: + - name: worker-private + env: + - name: GOOGLE_CLOUD_PROJECT + value: oss-vdb-test + - name: OSV_VULNERABILITIES_BUCKET + value: osv-test-vulnerabilities-private + - name: DATASTORE_DATABASE_ID + value: "datastore-private" + - name: PUBSUB_SUBSCRIPTION + value: "private-default-pool" + - name: FAILED_TASKS_TOPIC + value: "failed-private-tasks" + resources: + requests: + cpu: "0.9" + memory: "1.2Gi" + limits: + cpu: "1.5" + memory: "1.3Gi" \ No newline at end of file diff --git a/deployment/clouddeploy/gke-workers/skaffold.yaml b/deployment/clouddeploy/gke-workers/skaffold.yaml index 96d87ac708f..2e28a0f7ea4 100644 --- a/deployment/clouddeploy/gke-workers/skaffold.yaml +++ b/deployment/clouddeploy/gke-workers/skaffold.yaml @@ -15,5 +15,11 @@ profiles: paths: - environments/oss-vdb +- name: private + manifests: + kustomize: + paths: + - environments/private + deploy: kubectl: {} diff --git a/gcp/workers/cron/backup/backup.py b/gcp/workers/cron/backup/backup.py index 2f8c5eaa7d4..8ac954b210f 100644 --- a/gcp/workers/cron/backup/backup.py +++ b/gcp/workers/cron/backup/backup.py @@ -14,6 +14,7 @@ # limitations under the License. """Datastore backup.""" +import logging import os import sys from google.cloud import ndb @@ -27,24 +28,34 @@ def main(): """Create a Datastore backup.""" client = ds_admin.DatastoreAdminClient() - backup_bucket = os.environ['BACKUP_BUCKET'] - affected_commits_backup_bucket = os.environ['AFFECTED_COMMITS_BACKUP_BUCKET'] project_id = os.environ['GOOGLE_CLOUD_PROJECT'] - client.export_entities( - project_id=project_id, output_url_prefix=f'gs://{backup_bucket}') - entity_filter = datastore_admin_v1.EntityFilter() - entity_filter.kinds = ['AffectedCommits'] - client.export_entities( - project_id=project_id, - output_url_prefix=f'gs://{affected_commits_backup_bucket}', - entity_filter=entity_filter) + backup_bucket = os.environ.get('BACKUP_BUCKET') + if backup_bucket: + logging.info('Exporting all entities to gs://%s', backup_bucket) + client.export_entities( + project_id=project_id, output_url_prefix=f'gs://{backup_bucket}') + + affected_commits_backup_bucket = os.environ.get( + 'AFFECTED_COMMITS_BACKUP_BUCKET') + if affected_commits_backup_bucket: + logging.info('Exporting AffectedCommits to gs://%s', + affected_commits_backup_bucket) + entity_filter = datastore_admin_v1.EntityFilter() + entity_filter.kinds = ['AffectedCommits'] + client.export_entities( + project_id=project_id, + output_url_prefix=f'gs://{affected_commits_backup_bucket}', + entity_filter=entity_filter) return 0 if __name__ == '__main__': - _ndb_client = ndb.Client() + database_id = os.getenv('DATASTORE_DATABASE_ID') + if not database_id: + database_id = None + _ndb_client = ndb.Client(database=database_id) osv.logs.setup_gcp_logging('backup') with _ndb_client.context(): sys.exit(main()) diff --git a/gcp/workers/recoverer/recoverer.py b/gcp/workers/recoverer/recoverer.py index 4e335eb9d14..11a315daa5c 100644 --- a/gcp/workers/recoverer/recoverer.py +++ b/gcp/workers/recoverer/recoverer.py @@ -32,8 +32,8 @@ import osv.sources from osv.logs import setup_gcp_logging -_FAILED_TASKS_SUBSCRIPTION = 'recovery' -_TASKS_TOPIC = 'tasks' +_FAILED_TASKS_SUBSCRIPTION = os.getenv('FAILED_TASKS_SUBSCRIPTION', 'recovery') +_TASKS_TOPIC = os.getenv('WORKER_TASK_TOPIC', 'tasks') _ndb_client = None _storage_client = None @@ -43,8 +43,11 @@ def ndb_client(): """Get the ndb client. Lazily initialized to allow testing with datastore emulator.""" global _ndb_client + database_id = os.getenv('DATASTORE_DATABASE_ID') + if not database_id: + database_id = None if _ndb_client is None: - _ndb_client = ndb.Client() + _ndb_client = ndb.Client(database=database_id) return _ndb_client diff --git a/go/cmd/importer/main.go b/go/cmd/importer/main.go index 81b5fc0ed42..a3d215de816 100644 --- a/go/cmd/importer/main.go +++ b/go/cmd/importer/main.go @@ -89,7 +89,8 @@ func main() { logger.FatalContext(ctx, "Forced shut down after 30 seconds") }() - datastoreClient, err := datastore.NewClient(ctx, project) + datastoreID := os.Getenv("DATASTORE_DATABASE_ID") // empty string is the default database, which is what we want. + datastoreClient, err := datastore.NewClientWithDatabase(ctx, project, datastoreID) if err != nil { logger.FatalContext(ctx, "Failed to create datastore client", slog.Any("error", err)) } diff --git a/go/cmd/recordchecker/recordchecker.go b/go/cmd/recordchecker/recordchecker.go index d52075f79f9..c48f22c0616 100644 --- a/go/cmd/recordchecker/recordchecker.go +++ b/go/cmd/recordchecker/recordchecker.go @@ -22,7 +22,7 @@ import ( ) const ( - pubsubTopic = "failed-tasks" + defaultPubsubTopic = "failed-tasks" // defaultNumWorkers is the default number of concurrent workers to use. // This can be overridden by setting the NUM_WORKERS environment variable. defaultNumWorkers = 50 @@ -228,7 +228,8 @@ func setup(ctx context.Context) (*appEnv, error) { } gcsClient := clients.NewGCSClient(storageClient, bucketName) - dsClient, err := datastore.NewClient(ctx, projectID) + datastoreID := os.Getenv("DATASTORE_DATABASE_ID") // empty string is the default database, which is what we want. + dsClient, err := datastore.NewClientWithDatabase(ctx, projectID, datastoreID) if err != nil { gcsClient.Close() err = fmt.Errorf("failed to create datastore client: %w", err) @@ -244,6 +245,10 @@ func setup(ctx context.Context) (*appEnv, error) { return nil, err } + pubsubTopic := os.Getenv("FAILED_TASKS_TOPIC") + if pubsubTopic == "" { + pubsubTopic = defaultPubsubTopic + } publisher := pubsubClient.Publisher(pubsubTopic) numWorkers := defaultNumWorkers diff --git a/go/cmd/relations/relations.go b/go/cmd/relations/relations.go index 9386a6b0f82..06bff881798 100644 --- a/go/cmd/relations/relations.go +++ b/go/cmd/relations/relations.go @@ -98,7 +98,8 @@ func setupClients(ctx context.Context) (gClients, error) { } // Initialize clients - dsClient, err := datastore.NewClient(ctx, projectID, option.WithTelemetryDisabled()) + datastoreID := os.Getenv("DATASTORE_DATABASE_ID") // empty string is the default database, which is what we want. + dsClient, err := datastore.NewClientWithDatabase(ctx, projectID, datastoreID, option.WithTelemetryDisabled()) if err != nil { return gClients{}, fmt.Errorf("failed to create datastore client: %w", err) }