diff --git a/mddocs/Makefile b/mddocs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/mddocs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/mddocs/_static/architecture.png b/mddocs/_static/architecture.png new file mode 100644 index 00000000..4bea0bc5 Binary files /dev/null and b/mddocs/_static/architecture.png differ diff --git a/mddocs/_static/custom.css b/mddocs/_static/custom.css new file mode 100644 index 00000000..ac17ca7e --- /dev/null +++ b/mddocs/_static/custom.css @@ -0,0 +1,3 @@ +.logo { + width: 200px !important; +} diff --git a/mddocs/_static/openapi.json b/mddocs/_static/openapi.json new file mode 100644 index 00000000..7b3bf552 --- /dev/null +++ b/mddocs/_static/openapi.json @@ -0,0 +1,9 @@ +{ + "openapi": "3.1.0", + "version": "unknown", + "info": { + "title": "Generated in CI", + "version": "unknown" + }, + "paths": {} +} \ No newline at end of file diff --git a/mddocs/_static/redoc.html b/mddocs/_static/redoc.html new file mode 100644 index 00000000..b9f59a07 --- /dev/null +++ b/mddocs/_static/redoc.html @@ -0,0 +1,28 @@ + + + + + SyncMaster - ReDoc + + + + + + + + + + + + + + diff --git a/mddocs/_static/swagger.html b/mddocs/_static/swagger.html new file mode 100644 index 00000000..ef34cf30 --- /dev/null +++ b/mddocs/_static/swagger.html @@ -0,0 +1,26 @@ + + + + + + + SwaggerUI + + + + +
+ + + + diff --git a/mddocs/changelog.md b/mddocs/changelog.md new file mode 100644 index 00000000..e74b51fd --- /dev/null +++ b/mddocs/changelog.md @@ -0,0 +1,13 @@ +# Changelog { #changelog } + +- [0.2.5 [UNRELEASED] (2025-09-30)][DRAFT] +- [0.2.4 (2025-07-09)][0.2.4] +- [0.2.3 (2025-04-11)][0.2.3] +- [0.2.2 (2025-04-11)][0.2.2] +- [0.2.1 (2025-04-07)][0.2.1] +- [0.2.0 (2025-04-04)][0.2.0] +- [0.1.5 (2024-04-22)][0.1.5] +- [0.1.4 (2024-04-15)][0.1.4] +- [0.1.3 (2024-04-15)][0.1.3] +- [0.1.2 (2024-04-15)][0.1.2] +- [0.1.1 (2024-03-29)][0.1.1] diff --git a/mddocs/changelog/0.1.1.md b/mddocs/changelog/0.1.1.md new file mode 100644 index 00000000..070d02ea --- /dev/null +++ b/mddocs/changelog/0.1.1.md @@ -0,0 +1,3 @@ +# 0.1.1 (2024-03-29) { #0.1.1 } + +SyncMaster is now open source! diff --git a/mddocs/changelog/0.1.2.md b/mddocs/changelog/0.1.2.md new file mode 100644 index 00000000..c82a1c6b --- /dev/null +++ b/mddocs/changelog/0.1.2.md @@ -0,0 +1,12 @@ +# 0.1.2 (2024-04-15) { #0.1.2 } + +## Features + +- Rename `syncmaster.backend.main` module to `syncmaster.backend`. It now also accepts the same args as `uvicorn`. ([#35](https://github.com/MobileTeleSystems/syncmaster/issues/35)) +- Add `syncmaster.db.migrations` module to run `alembic` with proper config. ([#35](https://github.com/MobileTeleSystems/syncmaster/issues/35)) +- Change backend and worker images to use entrypoint. ([#35](https://github.com/MobileTeleSystems/syncmaster/issues/35)) + +## Improvements + +- Run database migrations in the entrypoint of backend image, before starting the backend server. ([#35](https://github.com/MobileTeleSystems/syncmaster/issues/35)) +- Add healthchecks to example `docker-compose.yml`. ([#35](https://github.com/MobileTeleSystems/syncmaster/issues/35)) diff --git a/mddocs/changelog/0.1.3.md b/mddocs/changelog/0.1.3.md new file mode 100644 index 00000000..174506ea --- /dev/null +++ b/mddocs/changelog/0.1.3.md @@ -0,0 +1,5 @@ +# 0.1.3 (2024-04-15) { #0.1.3 } + +## Bug Fixes + +Fix backend image entrypoint. diff --git a/mddocs/changelog/0.1.4.md b/mddocs/changelog/0.1.4.md new file mode 100644 index 00000000..7837d9cc --- /dev/null +++ b/mddocs/changelog/0.1.4.md @@ -0,0 +1,6 @@ +# 0.1.4 (2024-04-15) { #0.1.4 } + +## Bug Fixes + +- Fix missing backend factory for uvicorn. +- Fix missing `kinit` executable in worker image. diff --git a/mddocs/changelog/0.1.5.md b/mddocs/changelog/0.1.5.md new file mode 100644 index 00000000..7c67f997 --- /dev/null +++ b/mddocs/changelog/0.1.5.md @@ -0,0 +1,17 @@ +# 0.1.5 (2024-04-22) { #0.1.5 } + +## Breaking Changes + +- Pass current `Run` to `CREATE_SPARK_SESSION_FUNCTION`. This allows using run/transfer/group information for Spark session options, + like `appName` or custom ones. ([#38](https://github.com/MobileTeleSystems/syncmaster/issues/38)) + +## Improvements + +- Reduce backend image size ([#44](https://github.com/MobileTeleSystems/syncmaster/issues/44)) + +## Bug Fixes + +- Fix 500 error in case of `PATCH v1/connections/:id` request with passed `auth_data.password` field value ([#39](https://github.com/MobileTeleSystems/syncmaster/issues/39)) +- Do not use `asyncio.gather` with SQLAlchemy requests ([#40](https://github.com/MobileTeleSystems/syncmaster/issues/40)) +- Fix 500 error while creating HDFS connection ([#41](https://github.com/MobileTeleSystems/syncmaster/issues/41)) +- Fix missing `options` field from Transfer params with `hdfs` and `s3` type diff --git a/mddocs/changelog/0.2.0.md b/mddocs/changelog/0.2.0.md new file mode 100644 index 00000000..d0c30cce --- /dev/null +++ b/mddocs/changelog/0.2.0.md @@ -0,0 +1,69 @@ +# 0.2.0 (2025-04-04) { #0.2.0 } + +## TL;DR + +- Completely new UI. +- Add support for FileSystem connections. +- Add support for simple transformations within transfer - filter files, filter rows, change columns. +- Add support for incremental read strategy. +- Add support for running transfers by schedule. +- Add support for changing SparkSession resource limits. + +Huge thanks to [Dmitry Pedchenko](https://github.com/dmitry-pedchenko), [Maxim Lixakov](https://github.com/maxim-lixakov), [Ilyas Gasanov](https://github.com/IlyasDevelopment), [Kirill Chernikov](https://github.com/Zabilsya). + +## Breaking Changes + +- Implement a single error handling format to improve consistency ([#95](https://github.com/MobileTeleSystems/syncmaster/issues/95)) +- Change response format for `GET /v1/groups` - add **current user role** for each group ([#97](https://github.com/MobileTeleSystems/syncmaster/issues/97)) +- Change response format for `GET /v1/groups/:id` - add **current user role** for group ([#109](https://github.com/MobileTeleSystems/syncmaster/issues/109)) +- Now migrations are executed in a dedicated one-off container, instead of being run as a part of `backend` container. ([#163](https://github.com/MobileTeleSystems/syncmaster/issues/163)) +- Delete **transfers**, **queues** and **groups** records instead of marking them as deleted ([#168](https://github.com/MobileTeleSystems/syncmaster/issues/168)) +- Move the `type` field from nested `connection_data` field to the root level of the `Connection` response. ([#169](https://github.com/MobileTeleSystems/syncmaster/issues/169)) +- Decouple `auth_data.type` from the connection type (e.g. `postgres`) and link it to the authentication type (e.g. `basic`). ([#169](https://github.com/MobileTeleSystems/syncmaster/issues/169)) +- Add deletion of **connections** records instead of marking them as deleted ([#170](https://github.com/MobileTeleSystems/syncmaster/issues/170)) +- Use PUT instead of PATCH for `Connection` and `Transfer` models ([#215](https://github.com/MobileTeleSystems/syncmaster/issues/215)) +- Return new connection and transfer object in `POST /v1/transfer/copy` and `POST /v1/connection/copy` endpoints. +- Change response status from 200 to 204 for all `DELETE` endpoints. + +## Features + +- Add hive to known types ([#67](https://github.com/MobileTeleSystems/syncmaster/issues/67)) +- Allow search for **groups**, **users**, **connections**, **transfers**, **queues** ([#92](https://github.com/MobileTeleSystems/syncmaster/issues/92), [#94](https://github.com/MobileTeleSystems/syncmaster/issues/94), [#99](https://github.com/MobileTeleSystems/syncmaster/issues/99), [#100](https://github.com/MobileTeleSystems/syncmaster/issues/100), [#101](https://github.com/MobileTeleSystems/syncmaster/issues/101), [#103](https://github.com/MobileTeleSystems/syncmaster/issues/103)) +- Add filters for **connections**, **transfers** and **runs** ([#94](https://github.com/MobileTeleSystems/syncmaster/issues/94), [#102](https://github.com/MobileTeleSystems/syncmaster/issues/102), [#106](https://github.com/MobileTeleSystems/syncmaster/issues/106)) +- Implement a scheduler to run celery tasks on a schedule. This can be done by setting `Transfer.is_scheduled=True` and `Transfer.schedule="..."` (cron-like expression). + The Run model now has a `type` field with options `MANUAL` and `SCHEDULED`. ([#114](https://github.com/MobileTeleSystems/syncmaster/issues/114)) +- Add GET `v1/monitoring/metrics` endpoint to provide basic HTTP server metrics in Prometheus format ([#121](https://github.com/MobileTeleSystems/syncmaster/issues/121)) +- Implemented `KeycloakAuthProvider` for Single Sign-On (SSO) authentication.([#123](https://github.com/MobileTeleSystems/syncmaster/issues/123)) +- Implemented `DummyAuthProvider` for development and testing environments. ([#123](https://github.com/MobileTeleSystems/syncmaster/issues/123)) +- Add API schemas for new DB sources - Clickhouse, MSSQL, MySQL ([#124](https://github.com/MobileTeleSystems/syncmaster/issues/124), [#125](https://github.com/MobileTeleSystems/syncmaster/issues/125), [#126](https://github.com/MobileTeleSystems/syncmaster/issues/126), [#160](https://github.com/MobileTeleSystems/syncmaster/issues/160)) +- Add logic for handling FTP, FTPS, SFTP, Samba, WebDAV transfers ([#189](https://github.com/MobileTeleSystems/syncmaster/issues/189), [#191](https://github.com/MobileTeleSystems/syncmaster/issues/191), [#192](https://github.com/MobileTeleSystems/syncmaster/issues/192), [#194](https://github.com/MobileTeleSystems/syncmaster/issues/194)) +- Add API schemas for file sources - SFTP, FTP, FTPS, WebDAV, Samba ([#187](https://github.com/MobileTeleSystems/syncmaster/issues/187)) +- Add API schemas for file formats - Excel, XML, ORC, Parquet ([#140](https://github.com/MobileTeleSystems/syncmaster/issues/140), [#142](https://github.com/MobileTeleSystems/syncmaster/issues/142), [#143](https://github.com/MobileTeleSystems/syncmaster/issues/143), [#144](https://github.com/MobileTeleSystems/syncmaster/issues/144)) +- Add compression options to file formats CSV, JSON, JSONLine, Excel, ORC, Parquet, XML ([#159](https://github.com/MobileTeleSystems/syncmaster/issues/159), [#161](https://github.com/MobileTeleSystems/syncmaster/issues/161)) +- Add transformations for **Transfers** with dataframe row filtering ([#184](https://github.com/MobileTeleSystems/syncmaster/issues/184)) +- Add transformations for **Transfers** with dataframe column filtering ([#186](https://github.com/MobileTeleSystems/syncmaster/issues/186)) +- Add transformations for **Transfers** with file filtering ([#198](https://github.com/MobileTeleSystems/syncmaster/issues/198)) +- Add `increment_by` field to `strategy_params` ([#202](https://github.com/MobileTeleSystems/syncmaster/issues/202)) +- Implement increment strategy for transfers with file sources ([#209](https://github.com/MobileTeleSystems/syncmaster/issues/209)) +- Implement increment strategy for transfers with database sources ([#211](https://github.com/MobileTeleSystems/syncmaster/issues/211)) +- Add `resources` field to `Transfer`. ([#214](https://github.com/MobileTeleSystems/syncmaster/issues/214)) +- Add `file_name_template` field to `target_params` ([#196](https://github.com/MobileTeleSystems/syncmaster/issues/196), [#201](https://github.com/MobileTeleSystems/syncmaster/issues/201)) + +## Improvements + +- Updated `User` model to include `email`, `first_name`, `middle_name`, and `last_name` fields, all optional. ([#123](https://github.com/MobileTeleSystems/syncmaster/issues/123)) +- Read env variable `SYNCMASTER__ENTRYPOINT__SUPERUSERS` to promote users to `SUPERUSER` role during server startup. ([#137](https://github.com/MobileTeleSystems/syncmaster/issues/137)) +- Enabled dynamic selection of authentication provider via environment variable `SYNCMASTER__AUTH__PROVIDER`. ([#123](https://github.com/MobileTeleSystems/syncmaster/issues/123)) +- Enable parallel reading from JDBC sources. ([#219](https://github.com/MobileTeleSystems/syncmaster/issues/219)) +- Reset HWM when changing strategy from `incremental` to `full`. ([#217](https://github.com/MobileTeleSystems/syncmaster/issues/217)) +- Grant read-only permissions for the previous group owner when ownership is transferred ([#135](https://github.com/MobileTeleSystems/syncmaster/issues/135)) + +## Bug Fixes + +- Use Hadoop AWS `magic` committer only if transfer *target* is S3. ([#46](https://github.com/MobileTeleSystems/syncmaster/issues/46)) +- Check that `service_name` and `sid` are mutually exclusive when editing Oracle connection. ([#52](https://github.com/MobileTeleSystems/syncmaster/issues/52)) +- Queue name is unique within a group, new field `slug` is globally-unique. ([#54](https://github.com/MobileTeleSystems/syncmaster/issues/54), [#119](https://github.com/MobileTeleSystems/syncmaster/issues/119)) +- Prohibit updating connection type it if there is a transfer associated with this connection. ([#55](https://github.com/MobileTeleSystems/syncmaster/issues/55)) +- Fix error when `is_scheduled` field value was ignored. ([#57](https://github.com/MobileTeleSystems/syncmaster/issues/57)) +- Group without any users assigned was missing in groups list. ([#62](https://github.com/MobileTeleSystems/syncmaster/issues/62)) +- Dump connection credentials while starting a transfer. ([#63](https://github.com/MobileTeleSystems/syncmaster/issues/63)) diff --git a/mddocs/changelog/0.2.1.md b/mddocs/changelog/0.2.1.md new file mode 100644 index 00000000..9f922472 --- /dev/null +++ b/mddocs/changelog/0.2.1.md @@ -0,0 +1,11 @@ +# 0.2.1 (2025-04-07) { #0.2.1 } + +## Improvements + +- Change docker image user from `root` to `syncmaster`, to improve security. +- Move server healthcheck to Docker image. +- SBOM file is generated on release. + +## Bug fixes + +- Fix missing Swagger docs in prod image. diff --git a/mddocs/changelog/0.2.2.md b/mddocs/changelog/0.2.2.md new file mode 100644 index 00000000..8ad82e6d --- /dev/null +++ b/mddocs/changelog/0.2.2.md @@ -0,0 +1,19 @@ +# 0.2.2 (2025-04-11) { #0.2.2 } + +## Breaking + +- Use `PUT /v1/qroups/:id` instead of `PATCH /v1/qroups/:id`. +- Use `PUT /v1/qroups/:id/users/:id` instead of `PATCH /v1/qroups/:id/users/:id`. +- Use `PUT /v1/queues/:id` instead of `PATCH /v1/queues/:id`. +- Now allowed names length should be in 3..128 symbols range, not 1..inf. + +## Improvements + +- Now queue name can include any ASCII printable characters. +- Queue slug is always lowercase. Spaces, hyphens and underscores are replaced with `-` symbol. + +## Bug fixes + +- Call `kinit` before starting Spark session connecting to `Hive` cluster. ([#225](https://github.com/MobileTeleSystems/syncmaster/issues/225)) +- Fix `HDFS` connection was trying to use anonymous auth instead of user/password. ([#225](https://github.com/MobileTeleSystems/syncmaster/issues/225)) +- Fix updating queue ignored name and didn’t reset description. diff --git a/mddocs/changelog/0.2.3.md b/mddocs/changelog/0.2.3.md new file mode 100644 index 00000000..57354dc0 --- /dev/null +++ b/mddocs/changelog/0.2.3.md @@ -0,0 +1,9 @@ +# 0.2.3 (2025-04-11) { #0.2.3 } + +## Bug fixes + +- Fix Worker not updating Run `status` and `ended_at` fields after executing a very long ETL process. + +## Improvements + +- Change Celery log level from DEBUG to INFO. diff --git a/mddocs/changelog/0.2.4.md b/mddocs/changelog/0.2.4.md new file mode 100644 index 00000000..307b2d54 --- /dev/null +++ b/mddocs/changelog/0.2.4.md @@ -0,0 +1,5 @@ +# 0.2.4 (2025-07-09) { #0.2.4 } + +## Improvements + +Include all required jars from Maven to worker image. This increases image size, but drastically reduces time of Spark session startup. diff --git a/mddocs/changelog/0.2.5.md b/mddocs/changelog/0.2.5.md new file mode 100644 index 00000000..07079167 --- /dev/null +++ b/mddocs/changelog/0.2.5.md @@ -0,0 +1,17 @@ +# 0.2.5 (2025-10-10) { #0.2.5 } + +## Features + +- Implement Keycloak login page on frontend. ([#128](https://github.com/MTSWebServices/syncmaster-ui/pull/128)) +- Implement ``GET /v1/auth/logout`` endpoint for ``KeycloakAuthProvider``. ([#275](https://github.com/MobileTeleSystems/syncmaster/issues/255)) + +## Improvements + +- Improved full-text search for technical fields such as hostnames, table names, and directory paths. ([#255](https://github.com/MobileTeleSystems/syncmaster/issues/255)) +- Replace 307 redirect to Keycloak auth page with 401 response, due to browser restrictions for redirect + CORS + localhost. ([#274](https://github.com/MobileTeleSystems/syncmaster/issues/274)) + +## Bug Fixes + +- Replace sync methods of Keycloak client with async ones. ([#177](https://github.com/MobileTeleSystems/syncmaster/issues/177)) + + Previously interaction with Keycloak could block asyncio event loop. diff --git a/mddocs/changelog/0.3.0.md b/mddocs/changelog/0.3.0.md new file mode 100644 index 00000000..aa8ff3bd --- /dev/null +++ b/mddocs/changelog/0.3.0.md @@ -0,0 +1,101 @@ +# 0.3.0 (2025-12-18) { #0.3.0 } + +Release of Data.SyncMaster 0.3.0 brings up support for Iceberg, Spark-on-K8s and Spark-on-Yarn. + +!!! note + Currently Spark-on-K8s and Spark-on-Yarn do not support FTP, FTPS, SFTP, Samba and WebDAV. + +## Breaking Changes + +- Worker container command should be changed from `--queues 123-myqueue` to `worker --queues 123-myqueue` ([#295](https://github.com/MobileTeleSystems/syncmaster/issues/295)). + +- Application should be configured via `config.yml` file ([#289](https://github.com/MobileTeleSystems/syncmaster/issues/289)). + + It's still possible to use environment variables instead. But it is not recommended for security reasons, as docker/k8s envs can be read by other users. + + Other notable changes: + +- Environment variable `SYNCMASTER__ENTRYPOINT__SUPERUSERS` is renamed to `SYNCMASTER__SUPERUSERS`. +- Logging format is configured explicitly via `config.yml` instead of having few predefined configuration files. + +- Moved `server.session` middleware settings to `auth` block ([#304](https://github.com/MobileTeleSystems/syncmaster/issues/304)). + Also rename some fields in `auth.keycloak` settings block. + +Before vs after + +Before: + +```yaml +auth: + provider: ... + keycloak: + server_url: ... + redirect_url: ... + +server: + session: + enabled: true + secret_key: ... +``` + +Now: + +```yaml +auth: + provider: + keycloak: + api_url: ... + ui_callback_url: ... + cookie: + secret_key: ... +``` + +## Features + +- Added Iceberg support (([#282](https://github.com/MobileTeleSystems/syncmaster/issues/282)), ([#284](https://github.com/MobileTeleSystems/syncmaster/issues/284)), ([#294](https://github.com/MobileTeleSystems/syncmaster/issues/294)), ([#297](https://github.com/MobileTeleSystems/syncmaster/issues/297))). + + Iceberg connection currently supports only Iceberg REST Catalog with S3 warehouse. + +- Allow using SyncMaster worker image as `spark.kubernetes.container.image`. ([#295](https://github.com/MobileTeleSystems/syncmaster/issues/295)) + +- Allow passing default Spark session config via worker settings ([#291](https://github.com/MobileTeleSystems/syncmaster/issues/291)): + +### Example config.yml + +```yaml +config.yml: +worker: + spark_session_default_config: + spark.master: local + spark.driver.host: 127.0.0.1 + spark.driver.bindAddress: 0.0.0.0 + spark.sql.pyspark.jvmStacktrace.enabled: true + spark.ui.enabled: false +``` + +- Added OAuth2GatewayProvider ([#283](https://github.com/MobileTeleSystems/syncmaster/issues/283)). + + This allows using Data.SyncMaster under OAuth2 Gateway. Implementation is similar to DummyAuthProvider. + +- Allow disabling `SessionMiddleware`, as it only required by `KeycloakAuthProvider`. + +- Add hooks support to worker classes (TransferController, Handler) ([#279](https://github.com/MobileTeleSystems/syncmaster/issues/279)). + +- Pass transfer name and group name to Handlers ([#308](https://github.com/MobileTeleSystems/syncmaster/issues/308)). + +## Improvements + +- Make S3 connection `region` a mandatory option, to prevent possible errors. +- Hide `database_name` from Clickhouse and MySQL connection pages. +- Frontend: add placeholders to connection params, like host, port and so on. +- Sync frontend and backend checks for some field patterns, e.g. table name should be in format `schema.table`. +- Improve OpenAPI schema fields description. + +## Bug Fixes + +Fix some file format options were ignored by SyncMaster worker: + +- XML: `root_tag`, `row_tag` +- Excel `start_cell`, `include_header` +- CSV `include_header`, `line_sep` +- JSON, JSONLine: `line_sep` diff --git a/mddocs/changelog/0.3.1.md b/mddocs/changelog/0.3.1.md new file mode 100644 index 00000000..f8adfb17 --- /dev/null +++ b/mddocs/changelog/0.3.1.md @@ -0,0 +1,14 @@ +# 0.3.1 (2026-01-15) { #0.3.1 } + +## Improvements + +- Added table name validation - must be in format `schema.table` or `namespace1.namespace2.table`. +- Cut 400MB of worker images size by removing copies of `.jar` files. +- Explicitly log transfer exception instead of `raised unexpected: UnpickleableExceptionWrapper` by celery. + +## Misc + +- Switch from `poetry` to `uv` for dependency management. +- Switch from `black` to `ruff format`. +- Switch from `flake8+wemake-python-styleguide` to `ruff check`, and fix found code smells. +- Dependency updates. diff --git a/mddocs/changelog/0.3.2.md b/mddocs/changelog/0.3.2.md new file mode 100644 index 00000000..914962e3 --- /dev/null +++ b/mddocs/changelog/0.3.2.md @@ -0,0 +1,5 @@ +# 0.3.2 (2026-03-05) { #0.3.2 } + +## Features + +Implement SQL transformations for transfer (([#327](https://github.com/MobileTeleSystems/syncmaster/issues/327)), ([#330](https://github.com/MobileTeleSystems/syncmaster/issues/255))) diff --git a/mddocs/changelog/DRAFT.md b/mddocs/changelog/DRAFT.md new file mode 100644 index 00000000..8a6b3a31 --- /dev/null +++ b/mddocs/changelog/DRAFT.md @@ -0,0 +1 @@ +# DRAFT { #DRAFT } diff --git a/mddocs/changelog/NEXT_RELEASE.md b/mddocs/changelog/NEXT_RELEASE.md new file mode 100644 index 00000000..a9831f9d --- /dev/null +++ b/mddocs/changelog/NEXT_RELEASE.md @@ -0,0 +1 @@ +% towncrier release notes start diff --git a/mddocs/changelog/index.md b/mddocs/changelog/index.md new file mode 100644 index 00000000..e61940e9 --- /dev/null +++ b/mddocs/changelog/index.md @@ -0,0 +1,13 @@ +# Changelog { #changelog } + +* [0.2.5 [UNRELEASED] (2025-09-30)][DRAFT] +* [0.2.4 (2025-07-09)][0.2.4] +* [0.2.3 (2025-04-11)][0.2.3] +* [0.2.2 (2025-04-11)][0.2.2] +* [0.2.1 (2025-04-07)][0.2.1] +* [0.2.0 (2025-04-04)][0.2.0] +* [0.1.5 (2024-04-22)][0.1.5] +* [0.1.4 (2024-04-15)][0.1.4] +* [0.1.3 (2024-04-15)][0.1.3] +* [0.1.2 (2024-04-15)][0.1.2] +* [0.1.1 (2024-03-29)][0.1.1] diff --git a/mddocs/changelog/next_release/.keep b/mddocs/changelog/next_release/.keep new file mode 100644 index 00000000..e69de29b diff --git a/mddocs/changelog/next_release/255.improvement.md b/mddocs/changelog/next_release/255.improvement.md new file mode 100644 index 00000000..7150ca0b --- /dev/null +++ b/mddocs/changelog/next_release/255.improvement.md @@ -0,0 +1,2 @@ +Improved full-text search by refining `tsvector` generation and adding better tokenization for technical fields such as hostnames, table names, and directory paths. +-- by {github:user}`marashka` diff --git a/mddocs/conf.py b/mddocs/conf.py new file mode 100644 index 00000000..bd462dc2 --- /dev/null +++ b/mddocs/conf.py @@ -0,0 +1,163 @@ +# SPDX-FileCopyrightText: 2025-present MTS PJSC +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: 2023-2024 MTS PJSC +# SPDX-License-Identifier: Apache-2.0 +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + + +import os +import sys +from pathlib import Path + +from packaging import version as Version + +PROJECT_ROOT_DIR = Path(__file__).parent.parent.resolve() + +sys.path.insert(0, os.fspath(PROJECT_ROOT_DIR)) + +# -- Project information ----------------------------------------------------- + +project = "syncmaster" +copyright = "2023-2024 MTS PJSC" +author = "DataOps.ETL" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. + +# this value is updated automatically by `poetry version ...` and poetry-bumpversion plugin +ver = Version.parse("0.2.5") +version = ver.base_version +# The full version, including alpha/beta/rc tags. +release = ver.public + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "numpydoc", + "sphinx_copybutton", + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinxcontrib.autodoc_pydantic", + "sphinxcontrib.towncrier", # provides `towncrier-draft-entries` directive + "sphinx_issues", + "sphinx_design", # provides `dropdown` directive + "sphinxcontrib.plantuml", + "sphinx_favicon", + "sphinxarg.ext", + "sphinx_last_updated_by_git", +] + +swagger = [ + { + "name": "SyncMaster REST API", + "page": "openapi", + "id": "syncmaster-api", + "options": { + "url": "_static/openapi.json", + }, + }, +] + +numpydoc_show_class_members = True +autodoc_pydantic_model_show_config = False +autodoc_pydantic_model_show_config_summary = False +autodoc_pydantic_model_show_config_member = False +autodoc_pydantic_model_show_json = False +autodoc_pydantic_model_show_validator_summary = False +autodoc_pydantic_model_show_validator_members = False +autodoc_pydantic_model_member_order = "bysource" +autodoc_pydantic_settings_show_config = False +autodoc_pydantic_settings_show_config_summary = True +autodoc_pydantic_settings_show_config_member = False +autodoc_pydantic_settings_show_json = False +autodoc_pydantic_settings_show_validator_summary = False +autodoc_pydantic_settings_show_validator_members = False +autodoc_pydantic_settings_member_order = "bysource" +autodoc_pydantic_field_list_validators = False +sphinx_tabs_disable_tab_closing = True + +# prevent >>>, ... and doctest outputs from copying +copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " +copybutton_prompt_is_regexp = True +copybutton_copy_empty_lines = False +copybutton_only_copy_prompt_lines = True + +towncrier_draft_autoversion_mode = "draft" +towncrier_draft_include_empty = False +towncrier_draft_working_directory = PROJECT_ROOT_DIR + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. + +html_theme = "furo" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] +html_extra_path = ["robots.txt"] +html_css_files = [ + "custom.css", +] + +# The master toctree document. +master_doc = "index" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = "en" + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = "syncmaster-doc" + + +# which is the equivalent to: +issues_uri = "https://github.com/MobileTeleSystems/syncmaster/issues/{issue}" +issues_pr_uri = "https://github.com/MobileTeleSystems/syncmaster/pulls/{pr}" +issues_commit_uri = "https://github.com/MobileTeleSystems/syncmaster/commit/{commit}" +issues_user_uri = "https://github.com/{user}" diff --git a/mddocs/contributing.md b/mddocs/contributing.md new file mode 100644 index 00000000..9239f19b --- /dev/null +++ b/mddocs/contributing.md @@ -0,0 +1,404 @@ +# Contributing Guide { #contributing} + +Welcome! There are many ways to contribute, including submitting bug +reports, improving documentation, submitting feature requests, reviewing +new submissions, or contributing code that can be incorporated into the +project. + +## Initial setup for local development + +### Install Git + +Please follow [instruction](https://docs.github.com/en/get-started/quickstart/set-up-git). + +### Create a fork + +If you are not a member of a development team building Data.SyncMaster, you should create a fork before making any changes. + +Please follow [instruction](https://docs.github.com/en/get-started/quickstart/fork-a-repo). + +### Clone the repo + +Open terminal and run these commands: + +```bash +git clone https://github.com/MobileTeleSystems/syncmaster -b develop + +cd syncmaster +``` + +### Setup environment + +Firstly, install [make](https://www.gnu.org/software/make/manual/make.html). It is used for running complex commands in local environment. + +Secondly, create virtualenv and install dependencies: + +```bash +make venv +``` + +If you already have venv, but need to install dependencies required for development: + +```bash +make venv-install +``` + +We are using [poetry](https://python-poetry.org/docs/managing-dependencies/) for managing dependencies and building the package. +It allows to keep development environment the same for all developers due to using lock file with fixed dependency versions. + +There are *extra* dependencies (included into package as optional): + +* `server` - for running server +* `worker` - for running Celery workers + +And *groups* (not included into package, used locally and in CI): + +* `test` - for running tests +* `dev` - for development, like linters, formatters, mypy, pre-commit and so on +* `docs` - for building documentation + +### Enable pre-commit hooks + +[pre-commit](https://pre-commit.com/) hooks allows to validate & fix repository content before making new commit. +It allows to run linters, formatters, fix file permissions and so on. If something is wrong, changes cannot be committed. + +Firstly, install pre-commit hooks: + +```bash +pre-commit install --install-hooks +``` + +Ant then test hooks run: + +```bash +pre-commit run +``` + +## How to + +### Run development instance locally + +Start DB container: + +```bash +make db broker +``` + +Then start development server: + +```bash +make dev-server +``` + +And open [http://localhost:8000/docs](http://localhost:8000/docs) + +Settings are stored in `.env.local` file. + +To start development worker, open a new terminal window/tab, and run: + +```bash +make dev-worker +``` + +### Working with migrations + +Start database: + +```bash +make db-start +``` + +Generate revision: + +```bash +make db-revision ARGS="-m 'Message'" +``` + +Upgrade db to `head` migration: + +```bash +make db-upgrade +``` + +Downgrade db to `head-1` migration: + +```bash +make db-downgrade +``` + +### Run tests locally + +#### Unit tests + +This is as simple as: + +```bash +make test-unit +``` + +This command starts all necessary containers (Postgres, RabbitMQ), runs all necessary migrations, and then runs Pytest. + +You can pass additional arguments to pytest like this: + +```bash +make test-unit PYTEST_ARGS="-k some-test -lsx -vvvv --log-cli-level=INFO" +``` + +Get fixtures not used by any test: + +```bash +make test-check-fixtures +``` + +#### Integration tests + +!!! warning + To run HDFS and Hive tests locally you should add the following line to your `/etc/hosts` (file path depends on OS): + +``` +# HDFS/Hive server returns container hostname as connection address, causing error in DNS resolution +127.0.0.1 test-hive +``` + +To run specific integration tests: + +```bash +make test-integration-hdfs +``` + +This starts database, broker & worker containers, and also HDFS container. Then it runs only HDFS-related integration tests. + +To run full test suite: + +```bash +make test-integration +``` + +This starts all containers and runs all integration tests. + +Like unit tests, you can pass extra arguments to Pytest: + +```bash +make test-integration-hdfs PYTEST_ARGS="-k some-test -lsx -vvvv --log-cli-level=INFO" +``` + +Stop all containers and remove created volumes: + +```bash +make test-cleanup ARGS="-v" +``` + +#### Run production instance locally + +Firstly, build production images: + +```bash +make prod-build +``` + +And then start all necessary services: + +```bash +make prod +``` + +Then open [http://localhost:8000/docs](http://localhost:8000/docs) + +Settings are stored in `.env.docker` file. + +### Build documentation + +Build documentation using Sphinx & open it: + +```bash +make docs +``` + +If documentation should be build cleanly instead of reusing existing build result: + +```bash +make docs-fresh +``` + +## Review process + +Please create a new GitHub issue for any significant changes and +enhancements that you wish to make. Provide the feature you would like +to see, why you need it, and how it will work. Discuss your ideas +transparently and get community feedback before proceeding. + +Significant Changes that you wish to contribute to the project should be +discussed first in a GitHub issue that clearly outlines the changes and +benefits of the feature. + +Small Changes can directly be crafted and submitted to the GitHub +Repository as a Pull Request. + +### Create pull request + +Commit your changes: + +```bash +git commit -m "Commit message" +git push +``` + +Then open Github interface and [create pull request](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#making-a-pull-request). +Please follow guide from PR body template. + +After pull request is created, it get a corresponding number, e.g. 123 (`pr_number`). + +### Write release notes + +Data.SyncMaster uses [towncrier](https://pypi.org/project/towncrier/) +for changelog management. + +To submit a change note about your PR, add a text file into the +[docs/changelog/next_release](./next_release) folder. It should contain an +explanation of what applying this PR will change in the way +end-users interact with the project. One sentence is usually +enough but feel free to add as many details as you feel necessary +for the users to understand what it means. + +**Use the past tense** for the text in your fragment because, +combined with others, it will be a part of the “news digest” +telling the readers **what changed** in a specific version of +the library *since the previous version*. + +reStructuredText syntax for highlighting code (inline or block), +linking parts of the docs or external sites. +If you wish to sign your change, feel free to add `-- by +:user:`github-username`` at the end (replace `github-username` +with your own!). + +Finally, name your file following the convention that Towncrier +understands: it should start with the number of an issue or a +PR followed by a dot, then add a patch type, like `feature`, +`doc`, `misc` etc., and add `.rst` as a suffix. If you +need to add more than one fragment, you may add an optional +sequence number (delimited with another period) between the type +and the suffix. + +In general the name will follow `..rst` pattern, +where the categories are: + +* `feature`: Any new feature. Adding new functionality that has not yet existed. +* `removal`: Signifying a deprecation or removal of public API. +* `bugfix`: A bug fix. +* `improvement`: An improvement. Improving functionality that already existed. +* `doc`: A change to the documentation. +* `dependency`: Dependency-related changes. +* `misc`: Changes internal to the repo like CI, test and build changes. +* `breaking`: introduces a breaking API change. +* `significant`: Indicates that significant changes have been made to the code. +* `dependency`: Indicates that there have been changes in dependencies. + +A pull request may have more than one of these components, for example +a code change may introduce a new feature that deprecates an old +feature, in which case two fragments should be added. It is not +necessary to make a separate documentation fragment for documentation +changes accompanying the relevant code changes. + +#### Examples for adding changelog entries to your Pull Requests + +```rst +Added a ``:github:user:`` role to Sphinx config -- by :github:user:`someuser` +``` + +```rst +Fixed behavior of ``server`` -- by :github:user:`someuser` +``` + +```rst +Added support of ``timeout`` in ``LDAP`` +-- by :github:user:`someuser`, :github:user:`anotheruser` and :github:user:`otheruser` +``` + +#### How to skip change notes check? + +Just add `ci:skip-changelog` label to pull request. + +#### Release Process + +Before making a release from the `develop` branch, follow these steps: + +1. Checkout to `develop` branch and update it to the actual state + +```bash +git checkout develop +git pull -p +``` + +1. Backup `NEXT_RELEASE.rst` + +```bash +cp "docs/changelog/NEXT_RELEASE.rst" "docs/changelog/temp_NEXT_RELEASE.rst" +``` + +1. Build the Release notes with Towncrier + +```bash +VERSION=$(poetry version -s) +towncrier build "--version=${VERSION}" --yes +``` + +1. Change file with changelog to release version number + +```bash +mv docs/changelog/NEXT_RELEASE.rst "docs/changelog/${VERSION}.rst" +``` + +1. Remove content above the version number heading in the `${VERSION}.rst` file + +```bash +awk '!/^.*towncrier release notes start/' "docs/changelog/${VERSION}.rst" > temp && mv temp "docs/changelog/${VERSION}.rst" +``` + +1. Update Changelog Index + +```bash +awk -v version=${VERSION} '/DRAFT/{print;print " " version;next}1' docs/changelog/index.rst > temp && mv temp docs/changelog/index.rst +``` + +1. Restore `NEXT_RELEASE.rst` file from backup + +```bash +mv "docs/changelog/temp_NEXT_RELEASE.rst" "docs/changelog/NEXT_RELEASE.rst" +``` + +1. Commit and push changes to `develop` branch + +```bash +git add . +git commit -m "Prepare for release ${VERSION}" +git push +``` + +1. Merge `develop` branch to `master`, **WITHOUT** squashing + +```bash +git checkout master +git pull +git merge develop +git push +``` + +1. Add git tag to the latest commit in `master` branch + +```bash +git tag "$VERSION" +git push origin "$VERSION" +``` + +1. Update version in `develop` branch **after release**: + +```bash +git checkout develop + +NEXT_VERSION=$(echo "$VERSION" | awk -F. '/[0-9]+\./{$NF++;print}' OFS=.) +poetry version "$NEXT_VERSION" + +git add . +git commit -m "Bump version" +git push +``` diff --git a/mddocs/design/entities/connection_info_db.png b/mddocs/design/entities/connection_info_db.png new file mode 100644 index 00000000..02d800d2 Binary files /dev/null and b/mddocs/design/entities/connection_info_db.png differ diff --git a/mddocs/design/entities/connection_info_fs.png b/mddocs/design/entities/connection_info_fs.png new file mode 100644 index 00000000..229f0cc0 Binary files /dev/null and b/mddocs/design/entities/connection_info_fs.png differ diff --git a/mddocs/design/entities/connection_list.png b/mddocs/design/entities/connection_list.png new file mode 100644 index 00000000..5a6f9d3c Binary files /dev/null and b/mddocs/design/entities/connection_list.png differ diff --git a/mddocs/design/entities/create_transfer_advanced.png b/mddocs/design/entities/create_transfer_advanced.png new file mode 100644 index 00000000..9b111190 Binary files /dev/null and b/mddocs/design/entities/create_transfer_advanced.png differ diff --git a/mddocs/design/entities/create_transfer_advanced_filter_columns.png b/mddocs/design/entities/create_transfer_advanced_filter_columns.png new file mode 100644 index 00000000..97acdc16 Binary files /dev/null and b/mddocs/design/entities/create_transfer_advanced_filter_columns.png differ diff --git a/mddocs/design/entities/create_transfer_advanced_filter_files.png b/mddocs/design/entities/create_transfer_advanced_filter_files.png new file mode 100644 index 00000000..73f2dc43 Binary files /dev/null and b/mddocs/design/entities/create_transfer_advanced_filter_files.png differ diff --git a/mddocs/design/entities/create_transfer_advanced_filter_rows.png b/mddocs/design/entities/create_transfer_advanced_filter_rows.png new file mode 100644 index 00000000..7aad8d60 Binary files /dev/null and b/mddocs/design/entities/create_transfer_advanced_filter_rows.png differ diff --git a/mddocs/design/entities/create_transfer_footer.png b/mddocs/design/entities/create_transfer_footer.png new file mode 100644 index 00000000..a051b066 Binary files /dev/null and b/mddocs/design/entities/create_transfer_footer.png differ diff --git a/mddocs/design/entities/create_transfer_head.png b/mddocs/design/entities/create_transfer_head.png new file mode 100644 index 00000000..7c6bf5cf Binary files /dev/null and b/mddocs/design/entities/create_transfer_head.png differ diff --git a/mddocs/design/entities/create_transfer_source_target.png b/mddocs/design/entities/create_transfer_source_target.png new file mode 100644 index 00000000..b8b19504 Binary files /dev/null and b/mddocs/design/entities/create_transfer_source_target.png differ diff --git a/mddocs/design/entities/group_add_member.png b/mddocs/design/entities/group_add_member.png new file mode 100644 index 00000000..9d88c8e2 Binary files /dev/null and b/mddocs/design/entities/group_add_member.png differ diff --git a/mddocs/design/entities/group_info.png b/mddocs/design/entities/group_info.png new file mode 100644 index 00000000..f502fbbc Binary files /dev/null and b/mddocs/design/entities/group_info.png differ diff --git a/mddocs/design/entities/group_list.png b/mddocs/design/entities/group_list.png new file mode 100644 index 00000000..b4493e25 Binary files /dev/null and b/mddocs/design/entities/group_list.png differ diff --git a/mddocs/design/entities/index.md b/mddocs/design/entities/index.md new file mode 100644 index 00000000..1cc9bb2b --- /dev/null +++ b/mddocs/design/entities/index.md @@ -0,0 +1,157 @@ +# Entities { #entities } + +## User + +SyncMaster is designed with multitenancy support and role-based access (see [Roles and permissions][role-permissions]). +All nteraction requires user authentication, there is no anonymous access allowed. + +Users are automatically after successful login, there is no special registration step. + +## Group + +All entity types (Connection, Transfer, Run, Queue) can be created only within some group. +Groups are independent from each other, and have globally unique name. + +![image](group_list.png) + +![image](group_info.png) + +Group can be created by any user, which automatically get `OWNER` role. +This role allows adding members to the group, and assign them specific roles: + +![image](group_add_member.png) + +## Connection + +Connection describes how SyncMaster can access specific database or filesystem. It has a type (e.g. `s3`, `hive`, `postgres`), +connection parameters (e.g. `host`, `port`, `protocol`) and auth data (`user` / `password` combination). + +Connections have unique name within the group. + +![image](connection_list.png)![image](connection_info_db.png)![image](connection_info_fs.png) + +## Transfer + +Transfer is the heart of SyncMaster. It describes what some data should be fetched from a source (DB connection + table name, FileSystem connection + directory path), +and what the target is (DB or FileSystem). + +Transfers have unique name within a group. + +![image](transfer_list.png)![image](create_transfer_head.png)![image](create_transfer_source_target.png)![image](create_transfer_advanced.png) + +It is possible to add transformations between reading and writing steps: + +![image](create_transfer_advanced_filter_files.png)![image](create_transfer_advanced_filter_rows.png)![image](create_transfer_advanced_filter_columns.png) + +Other transfer features are: + +- Choose different read strategies (`full`, `incremental`) +- Execute transfer on schedule (hourly, daily, weekly and so on) +- Set specific resources (CPU, RAM) for each transfer run + +![image](create_transfer_footer.png) + +## Run + +Each time transfer is started (manually or at some schedule), SyncMaster creates dedicated Run +which tracks the ETL process status, URL to worker logs and so on. + +![image](run_list.png)![image](run_info.png) + +## Queue + +Queue allows to bind specific transfer to a set of SyncMaster [Worker][worker] + +Queue have unique name within a group, and globally unique `slug` field which is generated during queue creation. + +![image](queue_list.png)![image](queue_info.png) + +Transfers cannot be created without queue. If there are no workers bound to a queue, created runs will not be executed. + +## Entity Diagram + +```mermaid +--- +title: Entity Diagram +--- +erDiagram + direction LR + User { + number id + string username + string is_active + string is_superuser + string created_at + string updated_at + } + + Group { + number id + string name + string description + number owner_id + string created_at + string updated_at + } + + Connection { + number id + number group_id + string type + string name + string description + string data + string created_at + string updated_at + } + + Queue { + number id + string name + string slug + string description + string created_at + string updated_at + } + + + Transfer { + number id + number group_id + string name + number source_connection_id + number target_connection_id + string strategy_params + string target_params + string transformations + string resources + string is_scheduled + string schedule + number queue_id + string created_at + string updated_at + } + + Run { + number id + number transfer_id + string started_at + string ended_at + string status + string type + string log_url + string transfer_dump + string created_at + string updated_at + } + + Run ||--o{ Transfer: contains + + Transfer ||--o{ Queue: contains + Transfer ||--o{ Connection: contains + Transfer ||--o{ Group: contains + Connection ||--o{ Group: contains + Queue ||--o{ Group: contains + Group }o--o{ User: contains + Group ||--o{ User: contains +``` diff --git a/mddocs/design/entities/queue_info.png b/mddocs/design/entities/queue_info.png new file mode 100644 index 00000000..9f1d844b Binary files /dev/null and b/mddocs/design/entities/queue_info.png differ diff --git a/mddocs/design/entities/queue_list.png b/mddocs/design/entities/queue_list.png new file mode 100644 index 00000000..29e44036 Binary files /dev/null and b/mddocs/design/entities/queue_list.png differ diff --git a/mddocs/design/entities/run_info.png b/mddocs/design/entities/run_info.png new file mode 100644 index 00000000..4f9c44fc Binary files /dev/null and b/mddocs/design/entities/run_info.png differ diff --git a/mddocs/design/entities/run_list.png b/mddocs/design/entities/run_list.png new file mode 100644 index 00000000..0dd37194 Binary files /dev/null and b/mddocs/design/entities/run_list.png differ diff --git a/mddocs/design/entities/transfer_list.png b/mddocs/design/entities/transfer_list.png new file mode 100644 index 00000000..94264f8c Binary files /dev/null and b/mddocs/design/entities/transfer_list.png differ diff --git a/mddocs/design/permissions.md b/mddocs/design/permissions.md new file mode 100644 index 00000000..7bf78005 --- /dev/null +++ b/mddocs/design/permissions.md @@ -0,0 +1,68 @@ +# Roles and permissions { #role-permissions } + +Object within the group can be seen/interacted with only by users which are members of the group. +Permissions are limited by role assigned to user within specific group. + +Roles are: + +- `GUEST` + Read-only access to objects within a group. +- `DEVELOPER` + Read-write (manage) connections, transfers and runs. Read-only for queues. +- `MAINTAINER` (DevOps): + Manage connections, transfers, runs and queues. +- `OWNER` (Product Owner) + Manage connections, transfers, runs, queues and user-group membership. Group can have only one owner. +- `SUPERUSER` (Admin) + Meta role assigned to specific users, NOT within group. All permissions, including ability to create/delete groups. + Superusers are created by [`manage-superusers-cli`][manage-superusers-cli]. + +## Groups + +### Rights to work with the groups repository + +| Rule Role | Guest | Developer | Maintainer | Owner | Superuser | +|-------------|---------|-------------|--------------|---------|-------------| +| READ | x | x | x | x | x | +| UPDATE | | | | x | x | +| CREATE | x | x | x | x | x | +| DELETE | | | | | x | + +## Add user to the group and delete + +Each user has the right to remove himself from a group, regardless of his role in the group. + +### Rights to add/delete users to a group + +| Rule Role | Guest | Developer | Maintainer | Owner | Superuser | +|-------------|---------|-------------|--------------|---------|-------------| +| READ | x | x | x | x | x | +| ADD, UPDATE | | | | x | x | + +## Transfers, and Connections + +### Right to work with Transfers and Connections within a group + +| Rule Role | Guest | Developer | Maintainer | Owner | Superuser | +|----------------|---------|-------------|--------------|---------|-------------| +| READ | x | x | x | x | x | +| UPDATE, CREATE | | x | x | x | x | +| DELETE | | | x | x | x | + +## Runs + +### Right to work with Runs within a group + +| Rule Role | Guest | Developer | Maintainer | Owner | Superuser | +|----------------------|---------|-------------|--------------|---------|-------------| +| READ | x | x | x | x | x | +| CREATE (START), STOP | | x | x | x | x | + +## Queues + +### Rights to work with Queues within a namespace + +| Rule Role | Guest | Developer | Maintainer | Owner | Superuser | +|------------------------|---------|-------------|--------------|---------|-------------| +| READ | x | x | x | x | x | +| UPDATE, DELETE, CREATE | | | x | x | x | diff --git a/mddocs/index.md b/mddocs/index.md new file mode 100644 index 00000000..5978a352 --- /dev/null +++ b/mddocs/index.md @@ -0,0 +1,59 @@ +# Data.SyncMaster + +[![Repo Status](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) [![Docker image](https://img.shields.io/docker/v/mtsrus/syncmaster-server?sort=semver&label=docker)](https://hub.docker.com/r/mtsrus/syncmaster-server) [![PyPI](https://img.shields.io/pypi/v/data-syncmaster)](https://pypi.org/project/data-syncmaster/) [![PyPI License](https://img.shields.io/pypi/l/data-syncmaster.svg)](https://github.com/MobileTeleSystems/syncmaster/blob/develop/LICENSE.txt) [![PyPI Python Version](https://img.shields.io/pypi/pyversions/data-syncmaster.svg)](https://badge.fury.io/py/data-syncmaster) [![Documentation](https://readthedocs.org/projects/syncmaster/badge/?version=stable)](https://syncmaster.readthedocs.io) +[![Build Status](https://github.com/MobileTeleSystems/syncmaster/workflows/Run%20All%20Tests/badge.svg)](https://github.com/MobileTeleSystems/syncmaster/actions) [![Coverage](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/MTSOnGithub/03e73a82ecc4709934540ce8201cc3b4/raw/syncmaster_badge.json)](https://github.com/MobileTeleSystems/syncmaster/actions) [![pre-commit.ci](https://results.pre-commit.ci/badge/github/MobileTeleSystems/syncmaster/develop.svg)](https://results.pre-commit.ci/latest/github/MobileTeleSystems/syncmaster/develop) + +## What is Data.SyncMaster? + +Data.SyncMaster is as low-code ETL tool for transferring data between databases and file systems. +List of currently supported connections: + +* Apache Hive +* Clickhouse +* Postgres +* Oracle +* MSSQL +* MySQL +* HDFS +* S3 +* FTP +* FTPS +* SFTP +* Samba +* WebDAV + +Based on [onETL](https://onetl.readthedocs.io/) and [Apache Spark](https://spark.apache.org/). + +**Note**: service is under active development, and is not ready to use. + +## Goals + +* Make transferring data between databases and file systems as simple as possible +* Provide a lot of builtin connectors to transfer data in heterogeneous environment +* RBAC and multitenancy support + +## Non-goals + +* This is not a backup system +* Only batch, no streaming + +High-level design + +* [Entities][entities] +* [Permissions][role-permissions] + +Reference + +* [Architecture][reference-architecture] +* [Database][database] +* [Broker][message-broker] +* [Server][server] +* [Frontend][frontend] +* [Worker][worker] +* [Scheduler][scheduler] + +Development + +* [Changelog][changelog] +* [Contributing][contributing] +* [Security][security] diff --git a/mddocs/make.bat b/mddocs/make.bat new file mode 100644 index 00000000..53ad1e82 --- /dev/null +++ b/mddocs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/mddocs/reference/architecture.md b/mddocs/reference/architecture.md new file mode 100644 index 00000000..47fd7099 --- /dev/null +++ b/mddocs/reference/architecture.md @@ -0,0 +1,16 @@ +# Architecture { #reference-architecture } + +## Components + +SyncMaster contains the following components: + +- [Frontend][frontend], main user interface. +- [REST API Server][server], providing REST API for fetching and manipulating entities. +- [Worker][worker], performing actual transfer work (ETL processes). +- [Scheduler][scheduler], scheduling transfers to be executed in future. +- [Relation Database][database] for storing internal data. +- [Message Broker][message-broker] for communications between Server/Scheduler and Worker. + +## Architecture diagram + +![image](_static/architecture.png) diff --git a/mddocs/reference/broker/index.md b/mddocs/reference/broker/index.md new file mode 100644 index 00000000..9db547d8 --- /dev/null +++ b/mddocs/reference/broker/index.md @@ -0,0 +1,234 @@ +# Message Broker { #message-broker } + +Message broker is componen used by [REST API Server][server]/[Scheduler][scheduler] to communicate with [Worker][worker]. + +SyncMaster can work virtually with any broker supported by [Celery](https://docs.celeryq.dev). +But the only broker we tested is [RabbitMQ](https://www.rabbitmq.com/). + +## Requirements + +- RabbitMQ 4.x. It is recommended to use latest RabbitMQ version. + +### Setup + +#### With Docker + +- Install [Docker](https://docs.docker.com/engine/install/) + +- Install [docker-compose](https://github.com/docker/compose/releases/) + +- Run the following command: + + ```console + $ docker compose --profile broker up -d --wait + ... + ``` + + `docker-compose` will download RabbitMQ image, create container and volume, and then start container. + Image entrypoint will create database if volume is empty. + + Options can be set via `.env` file or `environment` section in `docker-compose.yml` + +### `docker-compose.yml` + + ``` + services: + db: + image: postgres:17 + restart: unless-stopped + environment: + POSTGRES_DB: syncmaster + POSTGRES_USER: syncmaster + POSTGRES_PASSWORD: changeme + ports: + - 5432:5432 + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: pg_isready + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + db-migrations: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: no + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + entrypoint: [python, -m, syncmaster.db.migrations, upgrade, head] + env_file: .env.docker + depends_on: + db: + condition: service_healthy + + rabbitmq: + image: rabbitmq:4 + restart: unless-stopped + ports: + - 5672:5672 + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: rabbitmq-diagnostics -q ping + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + server: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + ports: + - 8000:8000 + environment: + # list here usernames which should be assigned SUPERUSER role on application start + SYNCMASTER__ENTRYPOINT__SUPERUSERS: admin + # PROMETHEUS_MULTIPROC_DIR is required for multiple workers, see: + # https://prometheus.github.io/client_python/multiprocess/ + PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-metrics + # tmpfs dir is cleaned up each container restart + tmpfs: + - /tmp/prometheus-metrics:mode=1777 + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - server + - frontend + - all + + worker: + image: mtsrus/syncmaster-worker:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.worker + context: . + target: prod + env_file: .env.docker + command: --loglevel=info -Q 123-test_queue # Queue.slug + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - worker + - all + + scheduler: + image: mtsrus/syncmaster-scheduler:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.scheduler + context: . + target: prod + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - scheduler + - all + + frontend: + image: mtsrus/syncmaster-ui:${VERSION:-develop} + restart: unless-stopped + env_file: .env.docker + ports: + - 3000:3000 + depends_on: + server: + condition: service_healthy + profiles: + - frontend + - all + + volumes: + postgres_data: + rabbitmq_data: + ``` + +### `.env.docker` + + ``` + TZ=UTC + ENV=LOCAL + + # Logging options + SYNCMASTER__LOGGING__SETUP=True + SYNCMASTER__LOGGING__PRESET=colored + + # Common DB options + SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + + # Encrypt / Decrypt credentials data using this Fernet key. + # !!! GENERATE YOUR OWN COPY FOR PRODUCTION USAGE !!! + SYNCMASTER__ENCRYPTION__SECRET_KEY=UBgPTioFrtH2unlC4XFDiGf5sYfzbdSf_VgiUSaQc94= + + # Common RabbitMQ options + SYNCMASTER__BROKER__URL=amqp://guest:guest@rabbitmq:5672 + + # Server options + SYNCMASTER__SERVER__SESSION__SECRET_KEY=generate_some_random_string + # !!! NEVER USE ON PRODUCTION !!! + SYNCMASTER__SERVER__DEBUG=true + + # Keycloak Auth + #SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider + SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 + SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=generated_by_keycloak + SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback + SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email + SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False + + # Dummy Auth + SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.dummy_provider.DummyAuthProvider + SYNCMASTER__AUTH__ACCESS_TOKEN__SECRET_KEY=generate_another_random_string + + # Scheduler options + SYNCMASTER__SCHEDULER__TRANSFER_FETCHING_TIMEOUT_SECONDS=200 + + # Worker options + SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://logs.location.example.com/syncmaster-worker?correlation_id=\{\{ correlation_id \}\}&run_id=\{\{ run.id \}\} + SYNCMASTER__HWM_STORE__ENABLED=true + SYNCMASTER__HWM_STORE__TYPE=horizon + SYNCMASTER__HWM_STORE__URL=http://horizon:8000 + SYNCMASTER__HWM_STORE__NAMESPACE=syncmaster_namespace + SYNCMASTER__HWM_STORE__USER=admin + SYNCMASTER__HWM_STORE__PASSWORD=123UsedForTestOnly@! + + # Frontend options + SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 + + # Cors + SYNCMASTER__SERVER__CORS__ENABLED=True + SYNCMASTER__SERVER__CORS__ALLOW_ORIGINS=["http://localhost:3000"] + SYNCMASTER__SERVER__CORS__ALLOW_CREDENTIALS=True + SYNCMASTER__SERVER__CORS__ALLOW_METHODS=["*"] + SYNCMASTER__SERVER__CORS__ALLOW_HEADERS=["*"] + SYNCMASTER__SERVER__CORS__EXPOSE_HEADERS=["X-Request-ID","Location","Access-Control-Allow-Credentials"] + ``` + +#### Without Docker + +Please follow [RabbitMQ installation instruction](https://www.rabbitmq.com/docs/download). diff --git a/mddocs/reference/database/configuration.md b/mddocs/reference/database/configuration.md new file mode 100644 index 00000000..81db304b --- /dev/null +++ b/mddocs/reference/database/configuration.md @@ -0,0 +1,3 @@ +# Database settings { #configuration-database } + +::: syncmaster.settings.database.DatabaseSettings diff --git a/mddocs/reference/database/credentials_encryption.md b/mddocs/reference/database/credentials_encryption.md new file mode 100644 index 00000000..416bb38a --- /dev/null +++ b/mddocs/reference/database/credentials_encryption.md @@ -0,0 +1,3 @@ +# Credentials encryption { #configuration-credentials-encryption } + +::: syncmaster.settings.credentials.CredentialsEncryptionSettings diff --git a/mddocs/reference/database/index.md b/mddocs/reference/database/index.md new file mode 100644 index 00000000..8e47fbe2 --- /dev/null +++ b/mddocs/reference/database/index.md @@ -0,0 +1,293 @@ +# Relation Database { #database } + +SyncMaster requires relational database for storing internal data. + +Currently, SyncMaster supports only [PostgreSQL](https://www.postgresql.org/). + +## Migrations + +After a database is started, it is required to run migration script. +For empty database, it creates all the required tables and indexes. +For non-empty database, it will perform database structure upgrade, using [Alembic](https://alembic.sqlalchemy.org/). + +!!! warning + Other containers (server, scheduler, worker) should be stopped while running migrations, to prevent interference. + +## Requirements + +- PostgreSQL 12 or higher. It is recommended to use latest Postgres version. + +## Install & run + +### With Docker + +- Install [Docker](https://docs.docker.com/engine/install/) + +- Install [docker-compose](https://github.com/docker/compose/releases/) + +- Run the following command: + + ```console + $ docker compose up -d db db-migrations + ... + ``` + + `docker-compose` will download PostgreSQL image, create container and volume, and then start container. + Image entrypoint will create database if volume is empty. + + After that, one-off container with migrations script will run. + + Options can be set via `.env` file or `environment` section in `docker-compose.yml` + +### `docker-compose.yml` + + ``` + services: + db: + image: postgres:17 + restart: unless-stopped + environment: + POSTGRES_DB: syncmaster + POSTGRES_USER: syncmaster + POSTGRES_PASSWORD: changeme + ports: + - 5432:5432 + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: pg_isready + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + db-migrations: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: no + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + entrypoint: [python, -m, syncmaster.db.migrations, upgrade, head] + env_file: .env.docker + depends_on: + db: + condition: service_healthy + + rabbitmq: + image: rabbitmq:4 + restart: unless-stopped + ports: + - 5672:5672 + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: rabbitmq-diagnostics -q ping + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + server: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + ports: + - 8000:8000 + environment: + # list here usernames which should be assigned SUPERUSER role on application start + SYNCMASTER__ENTRYPOINT__SUPERUSERS: admin + # PROMETHEUS_MULTIPROC_DIR is required for multiple workers, see: + # https://prometheus.github.io/client_python/multiprocess/ + PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-metrics + # tmpfs dir is cleaned up each container restart + tmpfs: + - /tmp/prometheus-metrics:mode=1777 + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - server + - frontend + - all + + worker: + image: mtsrus/syncmaster-worker:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.worker + context: . + target: prod + env_file: .env.docker + command: --loglevel=info -Q 123-test_queue # Queue.slug + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - worker + - all + + scheduler: + image: mtsrus/syncmaster-scheduler:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.scheduler + context: . + target: prod + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - scheduler + - all + + frontend: + image: mtsrus/syncmaster-ui:${VERSION:-develop} + restart: unless-stopped + env_file: .env.docker + ports: + - 3000:3000 + depends_on: + server: + condition: service_healthy + profiles: + - frontend + - all + + volumes: + postgres_data: + rabbitmq_data: + ``` + +### `.env.docker` + + ``` + TZ=UTC + ENV=LOCAL + + # Logging options + SYNCMASTER__LOGGING__SETUP=True + SYNCMASTER__LOGGING__PRESET=colored + + # Common DB options + SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + + # Encrypt / Decrypt credentials data using this Fernet key. + # !!! GENERATE YOUR OWN COPY FOR PRODUCTION USAGE !!! + SYNCMASTER__ENCRYPTION__SECRET_KEY=UBgPTioFrtH2unlC4XFDiGf5sYfzbdSf_VgiUSaQc94= + + # Common RabbitMQ options + SYNCMASTER__BROKER__URL=amqp://guest:guest@rabbitmq:5672 + + # Server options + SYNCMASTER__SERVER__SESSION__SECRET_KEY=generate_some_random_string + # !!! NEVER USE ON PRODUCTION !!! + SYNCMASTER__SERVER__DEBUG=true + + # Keycloak Auth + #SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider + SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 + SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=generated_by_keycloak + SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback + SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email + SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False + + # Dummy Auth + SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.dummy_provider.DummyAuthProvider + SYNCMASTER__AUTH__ACCESS_TOKEN__SECRET_KEY=generate_another_random_string + + # Scheduler options + SYNCMASTER__SCHEDULER__TRANSFER_FETCHING_TIMEOUT_SECONDS=200 + + # Worker options + SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://logs.location.example.com/syncmaster-worker?correlation_id=\{\{ correlation_id \}\}&run_id=\{\{ run.id \}\} + SYNCMASTER__HWM_STORE__ENABLED=true + SYNCMASTER__HWM_STORE__TYPE=horizon + SYNCMASTER__HWM_STORE__URL=http://horizon:8000 + SYNCMASTER__HWM_STORE__NAMESPACE=syncmaster_namespace + SYNCMASTER__HWM_STORE__USER=admin + SYNCMASTER__HWM_STORE__PASSWORD=123UsedForTestOnly@! + + # Frontend options + SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 + + # Cors + SYNCMASTER__SERVER__CORS__ENABLED=True + SYNCMASTER__SERVER__CORS__ALLOW_ORIGINS=["http://localhost:3000"] + SYNCMASTER__SERVER__CORS__ALLOW_CREDENTIALS=True + SYNCMASTER__SERVER__CORS__ALLOW_METHODS=["*"] + SYNCMASTER__SERVER__CORS__ALLOW_HEADERS=["*"] + SYNCMASTER__SERVER__CORS__EXPOSE_HEADERS=["X-Request-ID","Location","Access-Control-Allow-Credentials"] + ``` + +### Without Docker + +- For installing PostgreSQL, please follow [installation instruction](https://www.postgresql.org/download/). + +- Install Python 3.11 or above + +- Create virtual environment + + ```console + $ python -m venv /some/.venv + $ source /some/.venv/activate + ... + ``` + +- Install `syncmaster` package with following *extra* dependencies: + + ```console + $ pip install syncmaster[postgres] + ... + ``` + +- Configure [Database connection][configuration-database] using environment variables, e.g. by creating `.env` file: + + ```console + $ export SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + ... + ``` + + And then read values from this file: + + ```console + $ source /some/.env + ... + ``` + +- Run migrations: + + ```console + $ python -m syncmaster.db.migrations upgrade head + ... + ``` + + This is a thin wrapper around [alembic cli](https://alembic.sqlalchemy.org/en/latest/tutorial.html#running-our-first-migration), + options and commands are just the same. + +> **This command should be executed after each upgrade to new Data.SyncMaster version.** + +## See also + +- [Database settings][configuration-database] +- [Credentials encryption][configuration-credentials-encryption] +- [Database structure][database-structure] diff --git a/mddocs/reference/database/structure.md b/mddocs/reference/database/structure.md new file mode 100644 index 00000000..764f40aa --- /dev/null +++ b/mddocs/reference/database/structure.md @@ -0,0 +1,108 @@ +# Database structure { #database-structure } + +% https://plantuml.com/en/ie-diagram + +```mermaid +--- +title: Database structure +--- +erDiagram + direction LR + User_Group { + bigint user_id PK + bigint group_id PK + varchar(255) role_id + } + + User { + bigint id PK + varchar(256) username + varchar(256) email null + varchar(256) first_name null + varchar(256) last_name null + varchar(256) middle_name null + boolean is_superuser + boolean is_active + timestamp created_at + timestamp updated_at + } + + Group { + bigint id PK + varchar(256) name + varchar(512) description + bigint owner_id + timestamptz created_at + timestamptz updated_at + tsquery search_vector + } + + Queue { + bigint id PK + varchar(128) name + varchar(256) slug + bigint group_id + varchar(512) description + timestamptz created_at + timestamptz updated_at + } + + Connection { + bigint id PK + bigint group_id + varchar(32) type + varchar(123) name + varchar(512) description + json data + timestamptz created_at + timestamptz updated_at + tsquery search_vector + } + + Auth_data { + bigint connection_id PK + text value + timestamptz created_at + timestamptz updated_at + } + + Transfer { + bigint id PK + bigint group_id + varchar(128) name + bigint source_connection_id + bigint target_connection_id + json strategy_params + json target_params + json transformations + json resources + boolean is_scheduled + varchar(32) schedule + bigint queue_id + timestamptz created_at + timestamptz updated_at + } + + Run { + bigint id PK + transfer_id bigint + timestamptz started_at + timestamptz ended_at + varchar(255) status + varchar(64) type_run + json transfer_dump + timestamptz created_at + timestamptz updated_at + } + + User_Group ||--o{ User: contains + User_Group ||--o{ Group: contains + Group ||--o{ User: contains + Queue ||--o{ Group: contains + Connection ||--o{ Group: contains + Auth_data ||--o{ Connection: contains + Transfer ||--o{ Queue: contains + Transfer ||--o{ Connection: contains + Transfer ||--o{ Group: contains + Run ||--o{ Transfer: contains +``` diff --git a/mddocs/reference/frontend/configuration.md b/mddocs/reference/frontend/configuration.md new file mode 100644 index 00000000..6e1d0a01 --- /dev/null +++ b/mddocs/reference/frontend/configuration.md @@ -0,0 +1,20 @@ +# Frontend configuration { #configuration-frontend } + +## API url + +SyncMaster UI requires REST API to be accessible from browser. API url is set up using environment variable: + +```bash +SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 +``` + +If both REST API and frontend are served on the same domain (e.g. through Nginx reverse proxy), for example: + +- REST API → `/api` +- Frontend → `/` + +Then you can use relative path: + +```bash +SYNCMASTER__UI__API_BROWSER_URL=/api +``` diff --git a/mddocs/reference/frontend/index.md b/mddocs/reference/frontend/index.md new file mode 100644 index 00000000..3bee3dd3 --- /dev/null +++ b/mddocs/reference/frontend/index.md @@ -0,0 +1,229 @@ +# Frontend { #frontend } + +SyncMaster provides a [Frontend (UI)](https://github.com/MobileTeleSystems/syncmaster-ui) based on [React](https://react.dev/), +providing users the ability to create, update, delete entities. + +## Install & run + +### With Docker + +- Install [Docker](https://docs.docker.com/engine/install/) + +- Install [docker-compose](https://github.com/docker/compose/releases/) + +- Run the following command: + + ```console + $ docker compose --profile frontend up -d --wait + ... + ``` + + `docker-compose` will download SyncMaster UI image, create containers, and then start them. + + Options can be set via `.env` file or `environment` section in `docker-compose.yml` + + ### `docker-compose.yml` + + ``` + services: + db: + image: postgres:17 + restart: unless-stopped + environment: + POSTGRES_DB: syncmaster + POSTGRES_USER: syncmaster + POSTGRES_PASSWORD: changeme + ports: + - 5432:5432 + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: pg_isready + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + db-migrations: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: no + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + entrypoint: [python, -m, syncmaster.db.migrations, upgrade, head] + env_file: .env.docker + depends_on: + db: + condition: service_healthy + + rabbitmq: + image: rabbitmq:4 + restart: unless-stopped + ports: + - 5672:5672 + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: rabbitmq-diagnostics -q ping + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + server: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + ports: + - 8000:8000 + environment: + # list here usernames which should be assigned SUPERUSER role on application start + SYNCMASTER__ENTRYPOINT__SUPERUSERS: admin + # PROMETHEUS_MULTIPROC_DIR is required for multiple workers, see: + # https://prometheus.github.io/client_python/multiprocess/ + PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-metrics + # tmpfs dir is cleaned up each container restart + tmpfs: + - /tmp/prometheus-metrics:mode=1777 + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - server + - frontend + - all + + worker: + image: mtsrus/syncmaster-worker:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.worker + context: . + target: prod + env_file: .env.docker + command: --loglevel=info -Q 123-test_queue # Queue.slug + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - worker + - all + + scheduler: + image: mtsrus/syncmaster-scheduler:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.scheduler + context: . + target: prod + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - scheduler + - all + + frontend: + image: mtsrus/syncmaster-ui:${VERSION:-develop} + restart: unless-stopped + env_file: .env.docker + ports: + - 3000:3000 + depends_on: + server: + condition: service_healthy + profiles: + - frontend + - all + + volumes: + postgres_data: + rabbitmq_data: + ``` + + ### `.env.docker` + + ``` + TZ=UTC + ENV=LOCAL + + # Logging options + SYNCMASTER__LOGGING__SETUP=True + SYNCMASTER__LOGGING__PRESET=colored + + # Common DB options + SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + + # Encrypt / Decrypt credentials data using this Fernet key. + # !!! GENERATE YOUR OWN COPY FOR PRODUCTION USAGE !!! + SYNCMASTER__ENCRYPTION__SECRET_KEY=UBgPTioFrtH2unlC4XFDiGf5sYfzbdSf_VgiUSaQc94= + + # Common RabbitMQ options + SYNCMASTER__BROKER__URL=amqp://guest:guest@rabbitmq:5672 + + # Server options + SYNCMASTER__SERVER__SESSION__SECRET_KEY=generate_some_random_string + # !!! NEVER USE ON PRODUCTION !!! + SYNCMASTER__SERVER__DEBUG=true + + # Keycloak Auth + #SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider + SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 + SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=generated_by_keycloak + SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback + SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email + SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False + + # Dummy Auth + SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.dummy_provider.DummyAuthProvider + SYNCMASTER__AUTH__ACCESS_TOKEN__SECRET_KEY=generate_another_random_string + + # Scheduler options + SYNCMASTER__SCHEDULER__TRANSFER_FETCHING_TIMEOUT_SECONDS=200 + + # Worker options + SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://logs.location.example.com/syncmaster-worker?correlation_id=\{\{ correlation_id \}\}&run_id=\{\{ run.id \}\} + SYNCMASTER__HWM_STORE__ENABLED=true + SYNCMASTER__HWM_STORE__TYPE=horizon + SYNCMASTER__HWM_STORE__URL=http://horizon:8000 + SYNCMASTER__HWM_STORE__NAMESPACE=syncmaster_namespace + SYNCMASTER__HWM_STORE__USER=admin + SYNCMASTER__HWM_STORE__PASSWORD=123UsedForTestOnly@! + + # Frontend options + SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 + + # Cors + SYNCMASTER__SERVER__CORS__ENABLED=True + SYNCMASTER__SERVER__CORS__ALLOW_ORIGINS=["http://localhost:3000"] + SYNCMASTER__SERVER__CORS__ALLOW_CREDENTIALS=True + SYNCMASTER__SERVER__CORS__ALLOW_METHODS=["*"] + SYNCMASTER__SERVER__CORS__ALLOW_HEADERS=["*"] + SYNCMASTER__SERVER__CORS__EXPOSE_HEADERS=["X-Request-ID","Location","Access-Control-Allow-Credentials"] + ``` + +- After frontend is started and ready, open [http://localhost:3000](http://localhost:3000). + +## See also + +- [Frontend configuration][configuration-frontend] diff --git a/mddocs/reference/scheduler/configuration/broker.md b/mddocs/reference/scheduler/configuration/broker.md new file mode 100644 index 00000000..a9a29583 --- /dev/null +++ b/mddocs/reference/scheduler/configuration/broker.md @@ -0,0 +1,3 @@ +# Broker settings { #scheduler-configuration-broker } + +::: syncmaster.settings.broker.RabbitMQSettings diff --git a/mddocs/reference/scheduler/configuration/credentials.md b/mddocs/reference/scheduler/configuration/credentials.md new file mode 100644 index 00000000..b1988d63 --- /dev/null +++ b/mddocs/reference/scheduler/configuration/credentials.md @@ -0,0 +1,3 @@ +# Credentials settings { #scheduler-configuration-credentials } + +::: syncmaster.settings.credentials.CredentialsEncryptionSettings diff --git a/mddocs/reference/scheduler/configuration/database.md b/mddocs/reference/scheduler/configuration/database.md new file mode 100644 index 00000000..e78d7c7b --- /dev/null +++ b/mddocs/reference/scheduler/configuration/database.md @@ -0,0 +1,3 @@ +# Database settings { #scheduler-configuration-database } + +::: syncmaster.settings.database.DatabaseSettings diff --git a/mddocs/reference/scheduler/configuration/index.md b/mddocs/reference/scheduler/configuration/index.md new file mode 100644 index 00000000..01254d87 --- /dev/null +++ b/mddocs/reference/scheduler/configuration/index.md @@ -0,0 +1,10 @@ +# Configuration { #scheduler-configuration } + +* [Database][scheduler-configuration-database] +* [Broker][scheduler-configuration-broker] +* [Credentials][scheduler-configuration-credentials] +* [Logging][scheduler-configuration-logging] + +::: syncmaster.scheduler.settings.SchedulerAppSettings + +::: syncmaster.scheduler.settings.SchedulerSettings diff --git a/mddocs/reference/scheduler/configuration/logging.md b/mddocs/reference/scheduler/configuration/logging.md new file mode 100644 index 00000000..57ebb23f --- /dev/null +++ b/mddocs/reference/scheduler/configuration/logging.md @@ -0,0 +1,3 @@ +# Logging settings { #scheduler-configuration-logging } + +::: syncmaster.settings.log.LoggingSettings diff --git a/mddocs/reference/scheduler/index.md b/mddocs/reference/scheduler/index.md new file mode 100644 index 00000000..ccd4267e --- /dev/null +++ b/mddocs/reference/scheduler/index.md @@ -0,0 +1,261 @@ +# Scheduler { #scheduler } + +SyncMaster scheduler is a dedicated process which periodically checks scheduler Transfers in [database][database], +and creates corresponding Runs in [message-broker][message-broker]. + +Implemented using [APScheduler](https://github.com/agronholm/apscheduler). + +## Install & run + +### With docker + +- Install [Docker](https://docs.docker.com/engine/install/) + +- Install [docker-compose](https://github.com/docker/compose/releases/) + +- Run the following command: + + ```console + $ docker compose --profile scheduler up -d --wait + ... + ``` + + `docker-compose` will download all necessary images, create containers, and then start the scheduler. + + Options can be set via `.env` file or `environment` section in `docker-compose.yml` + + ### `docker-compose.yml` + + ``` + services: + db: + image: postgres:17 + restart: unless-stopped + environment: + POSTGRES_DB: syncmaster + POSTGRES_USER: syncmaster + POSTGRES_PASSWORD: changeme + ports: + - 5432:5432 + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: pg_isready + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + db-migrations: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: no + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + entrypoint: [python, -m, syncmaster.db.migrations, upgrade, head] + env_file: .env.docker + depends_on: + db: + condition: service_healthy + + rabbitmq: + image: rabbitmq:4 + restart: unless-stopped + ports: + - 5672:5672 + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: rabbitmq-diagnostics -q ping + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + server: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + ports: + - 8000:8000 + environment: + # list here usernames which should be assigned SUPERUSER role on application start + SYNCMASTER__ENTRYPOINT__SUPERUSERS: admin + # PROMETHEUS_MULTIPROC_DIR is required for multiple workers, see: + # https://prometheus.github.io/client_python/multiprocess/ + PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-metrics + # tmpfs dir is cleaned up each container restart + tmpfs: + - /tmp/prometheus-metrics:mode=1777 + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - server + - frontend + - all + + worker: + image: mtsrus/syncmaster-worker:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.worker + context: . + target: prod + env_file: .env.docker + command: --loglevel=info -Q 123-test_queue # Queue.slug + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - worker + - all + + scheduler: + image: mtsrus/syncmaster-scheduler:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.scheduler + context: . + target: prod + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - scheduler + - all + + frontend: + image: mtsrus/syncmaster-ui:${VERSION:-develop} + restart: unless-stopped + env_file: .env.docker + ports: + - 3000:3000 + depends_on: + server: + condition: service_healthy + profiles: + - frontend + - all + + volumes: + postgres_data: + rabbitmq_data: + ``` + + ### `.env.docker` + + ``` + TZ=UTC + ENV=LOCAL + + # Logging options + SYNCMASTER__LOGGING__SETUP=True + SYNCMASTER__LOGGING__PRESET=colored + + # Common DB options + SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + + # Encrypt / Decrypt credentials data using this Fernet key. + # !!! GENERATE YOUR OWN COPY FOR PRODUCTION USAGE !!! + SYNCMASTER__ENCRYPTION__SECRET_KEY=UBgPTioFrtH2unlC4XFDiGf5sYfzbdSf_VgiUSaQc94= + + # Common RabbitMQ options + SYNCMASTER__BROKER__URL=amqp://guest:guest@rabbitmq:5672 + + # Server options + SYNCMASTER__SERVER__SESSION__SECRET_KEY=generate_some_random_string + # !!! NEVER USE ON PRODUCTION !!! + SYNCMASTER__SERVER__DEBUG=true + + # Keycloak Auth + #SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider + SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 + SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=generated_by_keycloak + SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback + SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email + SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False + + # Dummy Auth + SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.dummy_provider.DummyAuthProvider + SYNCMASTER__AUTH__ACCESS_TOKEN__SECRET_KEY=generate_another_random_string + + # Scheduler options + SYNCMASTER__SCHEDULER__TRANSFER_FETCHING_TIMEOUT_SECONDS=200 + + # Worker options + SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://logs.location.example.com/syncmaster-worker?correlation_id=\{\{ correlation_id \}\}&run_id=\{\{ run.id \}\} + SYNCMASTER__HWM_STORE__ENABLED=true + SYNCMASTER__HWM_STORE__TYPE=horizon + SYNCMASTER__HWM_STORE__URL=http://horizon:8000 + SYNCMASTER__HWM_STORE__NAMESPACE=syncmaster_namespace + SYNCMASTER__HWM_STORE__USER=admin + SYNCMASTER__HWM_STORE__PASSWORD=123UsedForTestOnly@! + + # Frontend options + SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 + + # Cors + SYNCMASTER__SERVER__CORS__ENABLED=True + SYNCMASTER__SERVER__CORS__ALLOW_ORIGINS=["http://localhost:3000"] + SYNCMASTER__SERVER__CORS__ALLOW_CREDENTIALS=True + SYNCMASTER__SERVER__CORS__ALLOW_METHODS=["*"] + SYNCMASTER__SERVER__CORS__ALLOW_HEADERS=["*"] + SYNCMASTER__SERVER__CORS__EXPOSE_HEADERS=["X-Request-ID","Location","Access-Control-Allow-Credentials"] + ``` + +### Without docker + +- Install Python 3.11 or above + +- Setup [Relation Database][database], run migrations + +- Setup [Message Broker][message-broker] + +- Create virtual environment + + ```console + $ python -m venv /some/.venv + $ source /some/.venv/activate + ... + ``` + +- Install `syncmaster` package with following *extra* dependencies: + + ```console + $ pip install syncmaster[scheduler] + ... + ``` + +- Run scheduler process: + + ```console + $ python -m syncmaster.Scheduler + ... + ``` + + Scheduler currently don't have any command line arguments. + +## See also + +- [Configuration][scheduler-configuration] diff --git a/mddocs/reference/scheduler/install.md b/mddocs/reference/scheduler/install.md new file mode 100644 index 00000000..b2fc46db --- /dev/null +++ b/mddocs/reference/scheduler/install.md @@ -0,0 +1,248 @@ +# Install & run scheduler { #server-install } + +## With docker + +### Installation process + +- Install [Docker](https://docs.docker.com/engine/install/) + +- Install [docker-compose](https://github.com/docker/compose/releases/) + +- Run the following command: + + ```console + $ docker compose --profile scheduler up -d --wait + ... + ``` + + `docker-compose` will download all necessary images, create containers, and then start the server. + + Options can be set via `.env` file or `environment` section in `docker-compose.yml` + +### `docker-compose.yml` + +``` +services: + db: + image: postgres:17 + restart: unless-stopped + environment: + POSTGRES_DB: syncmaster + POSTGRES_USER: syncmaster + POSTGRES_PASSWORD: changeme + ports: + - 5432:5432 + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: pg_isready + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + db-migrations: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: no + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + entrypoint: [python, -m, syncmaster.db.migrations, upgrade, head] + env_file: .env.docker + depends_on: + db: + condition: service_healthy + + rabbitmq: + image: rabbitmq:4 + restart: unless-stopped + ports: + - 5672:5672 + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: rabbitmq-diagnostics -q ping + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + server: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + ports: + - 8000:8000 + environment: + # list here usernames which should be assigned SUPERUSER role on application start + SYNCMASTER__ENTRYPOINT__SUPERUSERS: admin + # PROMETHEUS_MULTIPROC_DIR is required for multiple workers, see: + # https://prometheus.github.io/client_python/multiprocess/ + PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-metrics + # tmpfs dir is cleaned up each container restart + tmpfs: + - /tmp/prometheus-metrics:mode=1777 + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - server + - frontend + - all + + worker: + image: mtsrus/syncmaster-worker:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.worker + context: . + target: prod + env_file: .env.docker + command: --loglevel=info -Q 123-test_queue # Queue.slug + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - worker + - all + + scheduler: + image: mtsrus/syncmaster-scheduler:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.scheduler + context: . + target: prod + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - scheduler + - all + + frontend: + image: mtsrus/syncmaster-ui:${VERSION:-develop} + restart: unless-stopped + env_file: .env.docker + ports: + - 3000:3000 + depends_on: + server: + condition: service_healthy + profiles: + - frontend + - all + +volumes: + postgres_data: + rabbitmq_data: +``` + +### `.env.docker` + +``` +TZ=UTC +ENV=LOCAL + +# Logging options +SYNCMASTER__LOGGING__SETUP=True +SYNCMASTER__LOGGING__PRESET=colored + +# Common DB options +SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + +# Encrypt / Decrypt credentials data using this Fernet key. +# !!! GENERATE YOUR OWN COPY FOR PRODUCTION USAGE !!! +SYNCMASTER__ENCRYPTION__SECRET_KEY=UBgPTioFrtH2unlC4XFDiGf5sYfzbdSf_VgiUSaQc94= + +# Common RabbitMQ options +SYNCMASTER__BROKER__URL=amqp://guest:guest@rabbitmq:5672 + +# Server options +SYNCMASTER__SERVER__SESSION__SECRET_KEY=generate_some_random_string +# !!! NEVER USE ON PRODUCTION !!! +SYNCMASTER__SERVER__DEBUG=true + +# Keycloak Auth +#SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider +SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 +SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=manually_created +SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=manually_created +SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=generated_by_keycloak +SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback +SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email +SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False + +# Dummy Auth +SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.dummy_provider.DummyAuthProvider +SYNCMASTER__AUTH__ACCESS_TOKEN__SECRET_KEY=generate_another_random_string + +# Scheduler options +SYNCMASTER__SCHEDULER__TRANSFER_FETCHING_TIMEOUT_SECONDS=200 + +# Worker options +SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://logs.location.example.com/syncmaster-worker?correlation_id=\{\{ correlation_id \}\}&run_id=\{\{ run.id \}\} +SYNCMASTER__HWM_STORE__ENABLED=true +SYNCMASTER__HWM_STORE__TYPE=horizon +SYNCMASTER__HWM_STORE__URL=http://horizon:8000 +SYNCMASTER__HWM_STORE__NAMESPACE=syncmaster_namespace +SYNCMASTER__HWM_STORE__USER=admin +SYNCMASTER__HWM_STORE__PASSWORD=123UsedForTestOnly@! + +# Frontend options +SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 + +# Cors +SYNCMASTER__SERVER__CORS__ENABLED=True +SYNCMASTER__SERVER__CORS__ALLOW_ORIGINS=["http://localhost:3000"] +SYNCMASTER__SERVER__CORS__ALLOW_CREDENTIALS=True +SYNCMASTER__SERVER__CORS__ALLOW_METHODS=["*"] +SYNCMASTER__SERVER__CORS__ALLOW_HEADERS=["*"] +SYNCMASTER__SERVER__CORS__EXPOSE_HEADERS=["X-Request-ID","Location","Access-Control-Allow-Credentials"] +``` + +## Without docker + +- Install Python 3.11 or above + +- Setup [Relation Database][database], run migrations and create partitions + +- Create virtual environment + + ```console + $ python -m venv /some/.venv + $ source /some/.venv/activate + ... + ``` + +- Install `syncmaster` package with following *extra* dependencies: + + ```console + $ pip install syncmaster[scheduler] + ... + ``` + +- Run scheduler process + + ```console + $ python -m syncmaster.scheduler + ... + ``` diff --git a/mddocs/reference/server/auth/custom.md b/mddocs/reference/server/auth/custom.md new file mode 100644 index 00000000..41abbacb --- /dev/null +++ b/mddocs/reference/server/auth/custom.md @@ -0,0 +1,5 @@ +# Custom Auth provider { #server-auth-custom } + +You can implement custom auth provider by inheriting from class below and implementing necessary methods. + +::: syncmaster.server.providers.auth.AuthProvider diff --git a/mddocs/reference/server/auth/dummy.md b/mddocs/reference/server/auth/dummy.md new file mode 100644 index 00000000..81d6bee1 --- /dev/null +++ b/mddocs/reference/server/auth/dummy.md @@ -0,0 +1,69 @@ +# Dummy Auth provider { #server-auth-dummy } + +## Description { #server-auth-dummy-description } + +This auth provider allows to sign-in with any username and password, and and then issues an access token. + +After successful auth, username is saved to server database. + +## Interaction schema { #server-auth-dummy-interaction-shema} + +```mermaid +sequenceDiagram +participant "Client" +participant "Server" + +activate "Client" +alt Successful case +"Client" ->> "Server" : login + password +"Server" ->> "Server" : Password is completely ignored +"Server" ->> "Server" : Check user in internal server database +"Server" ->> "Server" : Create user if not exist +"Server" ->> "Client" : Generate and return access_token + +else User is blocked +"Client" ->> "Server" : login + password +"Server" ->> "Server" : Password is completely ignored +"Server" ->> "Server" : Check user in internal server database +"Server" --x "Client" : 401 Unauthorized + +else User is deleted +"Client" ->> "Server" : login + password +"Server" ->> "Server" : Password is completely ignored +"Server" ->> "Server" : Check user in internal server database +"Server" --x "Client" : 404 Not found +end + +alt Successful case +"Client" ->> "Server" : access_token +"Server" ->> "Server" : Validate token +"Server" ->> "Server" : Check user in internal server database +"Server" ->> "Server" : Get data +"Server" ->> "Client" : Return data + +else Token is expired +"Client" ->> "Server" : access_token +"Server" ->> "Server" : Validate token +"Server" --x "Client" : 401 Unauthorized + +else User is blocked +"Client" ->> "Server" : access_token +"Server" ->> "Server" : Validate token +"Server" ->> "Server" : Check user in internal server database +"Server" --x "Client" : 401 Unauthorized + +else User is deleted +"Client" ->> "Server" : access_token +"Server" ->> "Server" : Validate token +"Server" ->> "Server" : Check user in internal server database +"Server" --x "Client" : 404 Not found +end + +deactivate "Client" +``` + +## Configuration { #server-auth-dummy-configuration } + +::: syncmaster.server.settings.auth.dummy.DummyAuthProviderSettings + +::: syncmaster.server.settings.auth.jwt.JWTSettings diff --git a/mddocs/reference/server/auth/index.md b/mddocs/reference/server/auth/index.md new file mode 100644 index 00000000..0b95f2d6 --- /dev/null +++ b/mddocs/reference/server/auth/index.md @@ -0,0 +1,20 @@ +# Auth Providers { #server-auth-providers } + +Syncmaster supports different auth provider implementations. You can change implementation via settings: + +::: syncmaster.server.settings.auth.AuthSettings + +## Auth providers + +* [Dummy Auth provider][server-auth-dummy] + * [Description][server-auth-dummy-description] + * [Interaction schema][server-auth-dummy-interaction-shema] + * [Configuration][server-auth-dummy-configuration] +* [KeyCloak Auth provider][keycloak-auth-provider] + * [Description][keycloak-auth-provider-description] + * [Interaction schema][keycloak-auth-provider-interaction-schema] + * [Basic configuration][keycloak-auth-provider-basic-configuration] + +## For developers + +* [Custom Auth provider][server-auth-custom] diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-client-authentication.png b/mddocs/reference/server/auth/keycloak/images/keycloak-client-authentication.png new file mode 100644 index 00000000..126067a3 Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-client-authentication.png differ diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-client-redirect_uri.png b/mddocs/reference/server/auth/keycloak/images/keycloak-client-redirect_uri.png new file mode 100644 index 00000000..a68b812c Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-client-redirect_uri.png differ diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-client-secret.png b/mddocs/reference/server/auth/keycloak/images/keycloak-client-secret.png new file mode 100644 index 00000000..2a5f7edf Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-client-secret.png differ diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-login.png b/mddocs/reference/server/auth/keycloak/images/keycloak-login.png new file mode 100644 index 00000000..f6c0b2b8 Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-login.png differ diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-new-client.png b/mddocs/reference/server/auth/keycloak/images/keycloak-new-client.png new file mode 100644 index 00000000..7eb31977 Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-new-client.png differ diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-new-client_name.png b/mddocs/reference/server/auth/keycloak/images/keycloak-new-client_name.png new file mode 100644 index 00000000..58cd16ce Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-new-client_name.png differ diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-new-realm.png b/mddocs/reference/server/auth/keycloak/images/keycloak-new-realm.png new file mode 100644 index 00000000..0819822b Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-new-realm.png differ diff --git a/mddocs/reference/server/auth/keycloak/images/keycloak-new-realm_name.png b/mddocs/reference/server/auth/keycloak/images/keycloak-new-realm_name.png new file mode 100644 index 00000000..5d152016 Binary files /dev/null and b/mddocs/reference/server/auth/keycloak/images/keycloak-new-realm_name.png differ diff --git a/mddocs/reference/server/auth/keycloak/index.md b/mddocs/reference/server/auth/keycloak/index.md new file mode 100644 index 00000000..f847605a --- /dev/null +++ b/mddocs/reference/server/auth/keycloak/index.md @@ -0,0 +1,76 @@ +# KeyCloak Auth provider { #keycloak-auth-provider } + +## Description { #keycloak-auth-provider-description } + +Keycloak auth provider uses [python-keycloak](https://pypi.org/project/python-keycloak/) library to interact with Keycloak server. During the authentication process, +KeycloakAuthProvider redirects user to Keycloak authentication page. + +After successful authentication, Keycloak redirects user back to Syncmaster with authorization code. +Then KeycloakAuthProvider exchanges authorization code for an access token and uses it to get user information from Keycloak server. +If user is not found in Syncmaster database, KeycloakAuthProvider creates it. Finally, KeycloakAuthProvider returns user with access token. + +You can follow interaction schema below. + +## Interaction schema { #keycloak-auth-provider-interaction-schema } + +```mermaid +sequenceDiagram +participant "Client (User from Browser)" as Client +participant "Syncmaster" +participant "Keycloak" + +Client ->> Syncmaster : Request endpoint that requires authentication (/v1/users) + +Syncmaster --x Client: Redirect to Keycloak login URL (if no access token) + +Client ->> Keycloak : Callback redirect to Keycloak login page + +alt Successful login +Client ->> Keycloak : Log in with login and password +else Login failed +Keycloak --x Client: Display error (401 Unauthorized) +end + +Keycloak ->> Client : Redirect to Syncmaster to callback endpoint with code +Client ->> Syncmaster : Callback request to /v1/auth/callback with code +Syncmaster->> Keycloak : Exchange code for access token +Keycloak ->> Syncmaster : Return JWT token +Syncmaster ->> Client : Set JWT token in user's browser in cookies and redirect /v1/users + +Client ->> Syncmaster : Redirect to /v1/users +Syncmaster ->> Syncmaster : Get user info from JWT token and check user in internal server database +Syncmaster ->> Syncmaster : Create user in internal server database if not exist +Syncmaster ->> Client: Return requested data + + +alt Successful case +Client ->> Syncmaster : Request data with JWT token +Syncmaster ->> Syncmaster : Get user info from JWT token and check user in internal server database +Syncmaster ->> Syncmaster : Create user in internal server database if not exist +Syncmaster ->> Client: Return requested data + +else Access token is expired +Syncmaster ->> Keycloak : Get new JWT token via refresh token +Keycloak ->> Syncmaster : Return new JWT token +Syncmaster ->> Syncmaster : Get user info from JWT token and check user in internal server database +Syncmaster ->> Syncmaster : Create user in internal server database if not exist +Syncmaster ->> Client: Return requested data and set new JWT token in user's browser in cookies + +else Refresh token is expired +Syncmaster --x Client: Redirect to Keycloak login URL +end + +deactivate Client +``` + +## Basic configuration { #keycloak-auth-provider-basic-configuration } + +::: syncmaster.server.settings.auth.keycloak.KeycloakAuthProviderSettings + +::: syncmaster.server.settings.auth.keycloak.KeycloakSettings + +::: syncmaster.server.settings.auth.jwt.JWTSettings + +## Keycloak + +- [Local installation][local_installation] diff --git a/mddocs/reference/server/auth/keycloak/local_installation.md b/mddocs/reference/server/auth/keycloak/local_installation.md new file mode 100644 index 00000000..13abb403 --- /dev/null +++ b/mddocs/reference/server/auth/keycloak/local_installation.md @@ -0,0 +1,92 @@ +# Local installation and testing { #local_installation } + +You can test Keycloak auth locally with docker compose: + +```console +$ docker compose -f docker-compose.test.yml up keycloak -d +... +``` + +## Authorize in keycloak + +At first, you have to go to [http://localhost:8080/admin](http://localhost:8080/admin) and login via login: `admin`, password: `admin` (by default) to create realms. + +![image](images/keycloak-login.png) + +## Create new realm + +![image](images/keycloak-new-realm.png) + +## Create new realm name + +Pass realm name value. Then pass it to `SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME` environment variable: + +```console +$ export SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=fastapi_realm # as on screen below +... +``` + +![image](images/keycloak-new-realm_name.png) + +## Create new client + +![image](images/keycloak-new-client.png) + +## Create new client name + +In created realm pass client name value. Then pass it to `SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID` environment variable: + +```console +$ export SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=fastapi_client # as on screen below +... +``` + +![image](images/keycloak-new-client_name.png) + +## Set `client_authentication` **ON** to receive client_secret + +![image](images/keycloak-client-authentication.png) + +## Configure Redirect URI + +To configure the redirect URI where the browser will redirect to exchange the code provided from Keycloak for an access token, set the `SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI` environment variable. The default value for local development is `http://localhost:8000/auth/callback`. + +```console +$ export SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback +... +``` + +## Configure the client redirect URI + +Ensure that this URI is also configured as a valid redirect URI in your Keycloak client settings. This allows the browser to redirect to your application after the user successfully authenticates with Keycloak. + +![image](images/keycloak-client-redirect_uri.png) + +## Configure the client secret + +Now go to **Credentials** tab and add the client secret to the `SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET` environment variable: + +```console +$ export SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=6x6gn8uJdWSBmP8FqbNRSoGdvaoaFeez # as on screen below +... +``` + +![image](images/keycloak-client-secret.png) + +Now you can use create users in this realms, check [keycloak documentation](https://www.keycloak.org/docs/latest/server_admin/#assembly-managing-users_server_administration_guide) on how to manage users creation. + +## ENVIRONMENT VARIABLES + +After this you can user `KeycloakAuthProvider` in your application with provided environment variables: + +```console +$ export SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 +$ export SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback +$ export SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=fastapi_realm +$ export SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=fastapi_client +$ export SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=6x6gn8uJdWSBmP8FqbNRSoGdvaoaFeez +$ export SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email +$ export SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False +$ export SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider +... +``` diff --git a/mddocs/reference/server/configuration/broker.md b/mddocs/reference/server/configuration/broker.md new file mode 100644 index 00000000..f9af36a4 --- /dev/null +++ b/mddocs/reference/server/configuration/broker.md @@ -0,0 +1,3 @@ +# Broker settings { #server-configuration-broker } + +::: syncmaster.settings.broker.RabbitMQSettings diff --git a/mddocs/reference/server/configuration/cors.md b/mddocs/reference/server/configuration/cors.md new file mode 100644 index 00000000..c7d7b372 --- /dev/null +++ b/mddocs/reference/server/configuration/cors.md @@ -0,0 +1,5 @@ +# CORS settings { #server-configuration-cors } + +These settings used to control [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) options. + +::: syncmaster.server.settings.server.cors.CORSSettings diff --git a/mddocs/reference/server/configuration/credentials.md b/mddocs/reference/server/configuration/credentials.md new file mode 100644 index 00000000..3d56f178 --- /dev/null +++ b/mddocs/reference/server/configuration/credentials.md @@ -0,0 +1,3 @@ +# Credentials settings { #server-configuration-credentials } + +::: syncmaster.settings.credentials.CredentialsEncryptionSettings diff --git a/mddocs/reference/server/configuration/database.md b/mddocs/reference/server/configuration/database.md new file mode 100644 index 00000000..0b8ed70f --- /dev/null +++ b/mddocs/reference/server/configuration/database.md @@ -0,0 +1,3 @@ +# Database settings { #reference-server-configuration-database } + +::: syncmaster.settings.database.DatabaseSettings diff --git a/mddocs/reference/server/configuration/debug.md b/mddocs/reference/server/configuration/debug.md new file mode 100644 index 00000000..41c92827 --- /dev/null +++ b/mddocs/reference/server/configuration/debug.md @@ -0,0 +1,121 @@ +# Enabling debug { #server-configuration-debug } + +## Return debug info in REST API responses + +By default, server does not add error details to response bodies, +to avoid exposing instance-specific information to end users. + +You can change this by setting: + +```console +$ export SYNCMASTER__SERVER__DEBUG=False +$ # start REST API server +$ curl -XPOST http://localhost:8000/failing/endpoint ... +{ + "error": { + "code": "unknown", + "message": "Got unhandled exception. Please contact support", + "details": null, + }, +} +``` + +```console +$ export SYNCMASTER__SERVER__DEBUG=True +$ # start REST API server +$ curl -XPOST http://localhost:8000/failing/endpoint ... +Traceback (most recent call last): +File ".../uvicorn/protocols/http/h11_impl.py", line 408, in run_asgi + result = await app( # type: ignore[func-returns-value] + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +File ".../site-packages/uvicorn/middleware/proxy_headers.py", line 84, in __call__ + return await self.app(scope, receive, send) +``` + +## DANGER + +This is only for development environment only. Do **NOT** use on production! + +## Print debug logs on backend + +See [Logging settings][server-configuration-logging], but replace log level `INFO` with `DEBUG`. + +## Fill up `X-Request-ID` header on backend + +Server can add `X-Request-ID` header to responses, which allows to match request on client with backend response. + +This is done by `request_id` middleware, which is enabled by default and can configured as described below: + +::: syncmaster.server.settings.server.request_id.RequestIDSettings + +## Print request ID to backend logs + +This is done by adding a specific filter to logging handler: + +### `logging.yml` + +``` +# development usage only +version: 1 +disable_existing_loggers: false + +filters: + # Add request ID as extra field named `correlation_id` to each log record. + # This is used in combination with settings.server.request_id.enabled=True + # See https://github.com/snok/asgi-correlation-id#configure-logging + correlation_id: + (): asgi_correlation_id.CorrelationIdFilter + uuid_length: 32 + default_value: '-' + +formatters: + plain: + (): logging.Formatter + # Add correlation_id to log records + fmt: '%(asctime)s.%(msecs)03d %(processName)s:%(process)d %(name)s:%(lineno)d [%(levelname)s] %(correlation_id)s %(message)s' + datefmt: '%Y-%m-%d %H:%M:%S' + +handlers: + main: + class: logging.StreamHandler + formatter: plain + filters: [correlation_id] + stream: ext://sys.stdout + celery: + class: logging.StreamHandler + formatter: plain + filters: [correlation_id] + stream: ext://sys.stdout + +loggers: + '': + handlers: [main] + level: INFO + propagate: false + uvicorn: + handlers: [main] + level: INFO + propagate: false + celery: + level: INFO + handlers: [celery] + propagate: false + scheduler: + handlers: [main] + level: INFO + propagate: false + py4j: + handlers: [main] + level: WARNING + propagate: false + hdfs.client: + handlers: [main] + level: WARNING + propagate: false +``` + +Resulting logs look like: + +```text +2023-12-18 17:14:11.711 uvicorn.access:498 [INFO] 018c15e97a068ae09484f8c25e2799dd 127.0.0.1:34884 - "GET /monitoring/ping HTTP/1.1" 200 +``` diff --git a/mddocs/reference/server/configuration/index.md b/mddocs/reference/server/configuration/index.md new file mode 100644 index 00000000..92332085 --- /dev/null +++ b/mddocs/reference/server/configuration/index.md @@ -0,0 +1,17 @@ +# Configuration { #server-configuration } + +- [Database][reference-server-configuration-database] +- [Broker][server-configuration-broker] +- [Credentials][server-configuration-credentials] +- [Logging][server-configuration-logging] +- [Session][server-configuration-session] +- [Cors][server-configuration-cors] +- [Debug][server-configuration-debug] +- [Monitoring][server-configuration-monitoring] +- [Static_files][server-configuration-static-files] +- [Openapi][server-configuration-openapi] +- [Static_files][server-configuration-static-files] + +::: syncmaster.server.settings.ServerAppSettings + +::: syncmaster.server.settings.server.ServerSettings diff --git a/mddocs/reference/server/configuration/logging.md b/mddocs/reference/server/configuration/logging.md new file mode 100644 index 00000000..3d5fdaa3 --- /dev/null +++ b/mddocs/reference/server/configuration/logging.md @@ -0,0 +1,3 @@ +# Logging settings { #server-configuration-logging } + +::: syncmaster.settings.log.LoggingSettings diff --git a/mddocs/reference/server/configuration/monitoring.md b/mddocs/reference/server/configuration/monitoring.md new file mode 100644 index 00000000..e617d705 --- /dev/null +++ b/mddocs/reference/server/configuration/monitoring.md @@ -0,0 +1,9 @@ +# Server monitoring { #server-configuration-monitoring } + +REST API server provides the following endpoints with Prometheus compatible metrics: + +- `GET /monitoring/metrics` - server metrics, like number of requests per path and response status, CPU and RAM usage, and so on. + +These endpoints are enabled and configured using settings below: + +::: syncmaster.server.settings.server.monitoring.MonitoringSettings diff --git a/mddocs/reference/server/configuration/openapi.md b/mddocs/reference/server/configuration/openapi.md new file mode 100644 index 00000000..31236a26 --- /dev/null +++ b/mddocs/reference/server/configuration/openapi.md @@ -0,0 +1,13 @@ +# OpenAPI settings { #server-configuration-openapi } + +These settings used to control exposing OpenAPI.json and SwaggerUI/ReDoc endpoints. + +::: syncmaster.server.settings.server.openapi.OpenAPISettings + +::: syncmaster.server.settings.server.openapi.SwaggerSettings + +::: syncmaster.server.settings.server.openapi.RedocSettings + +::: syncmaster.server.settings.server.openapi.LogoSettings + +::: syncmaster.server.settings.server.openapi.FaviconSettings diff --git a/mddocs/reference/server/configuration/session.md b/mddocs/reference/server/configuration/session.md new file mode 100644 index 00000000..b82b7a82 --- /dev/null +++ b/mddocs/reference/server/configuration/session.md @@ -0,0 +1,5 @@ +# Session settings { #server-configuration-session } + +These settings used to control [Session](https://developer.mozilla.org/en-US/docs/Web/HTTP/Session) options. + +::: syncmaster.server.settings.server.session.SessionSettings diff --git a/mddocs/reference/server/configuration/static_files.md b/mddocs/reference/server/configuration/static_files.md new file mode 100644 index 00000000..860b57a9 --- /dev/null +++ b/mddocs/reference/server/configuration/static_files.md @@ -0,0 +1,5 @@ +# Serving static files { #server-configuration-static-files } + +These settings used to control serving static files by a server. + +::: syncmaster.server.settings.server.static_files.StaticFilesSettings diff --git a/mddocs/reference/server/index.md b/mddocs/reference/server/index.md new file mode 100644 index 00000000..f705bbdf --- /dev/null +++ b/mddocs/reference/server/index.md @@ -0,0 +1,267 @@ +# REST API Server { #server } + +SyncMaster server provides simple REST API for accessing entities stored in [database][reference-server-configuration-database]. +Implemented using [FastAPI](https://fastapi.tiangolo.com/). + +## Install & run + +### With docker + +- Install [Docker](https://docs.docker.com/engine/install/) + +- Install [docker-compose](https://github.com/docker/compose/releases/) + +- Run the following command: + + ```console + $ docker compose --profile server up -d --wait + ... + ``` + + `docker-compose` will download all necessary images, create containers, and then start the server. + + Options can be set via `.env` file or `environment` section in `docker-compose.yml` + +### `docker-compose.yml` + + ``` + services: + db: + image: postgres:17 + restart: unless-stopped + environment: + POSTGRES_DB: syncmaster + POSTGRES_USER: syncmaster + POSTGRES_PASSWORD: changeme + ports: + - 5432:5432 + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: pg_isready + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + db-migrations: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: no + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + entrypoint: [python, -m, syncmaster.db.migrations, upgrade, head] + env_file: .env.docker + depends_on: + db: + condition: service_healthy + + rabbitmq: + image: rabbitmq:4 + restart: unless-stopped + ports: + - 5672:5672 + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: rabbitmq-diagnostics -q ping + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + server: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + ports: + - 8000:8000 + environment: + # list here usernames which should be assigned SUPERUSER role on application start + SYNCMASTER__ENTRYPOINT__SUPERUSERS: admin + # PROMETHEUS_MULTIPROC_DIR is required for multiple workers, see: + # https://prometheus.github.io/client_python/multiprocess/ + PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-metrics + # tmpfs dir is cleaned up each container restart + tmpfs: + - /tmp/prometheus-metrics:mode=1777 + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - server + - frontend + - all + + worker: + image: mtsrus/syncmaster-worker:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.worker + context: . + target: prod + env_file: .env.docker + command: --loglevel=info -Q 123-test_queue # Queue.slug + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - worker + - all + + scheduler: + image: mtsrus/syncmaster-scheduler:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.scheduler + context: . + target: prod + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - scheduler + - all + + frontend: + image: mtsrus/syncmaster-ui:${VERSION:-develop} + restart: unless-stopped + env_file: .env.docker + ports: + - 3000:3000 + depends_on: + server: + condition: service_healthy + profiles: + - frontend + - all + + volumes: + postgres_data: + rabbitmq_data: + ``` + +### `.env.docker` + + ``` + TZ=UTC + ENV=LOCAL + + # Logging options + SYNCMASTER__LOGGING__SETUP=True + SYNCMASTER__LOGGING__PRESET=colored + + # Common DB options + SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + + # Encrypt / Decrypt credentials data using this Fernet key. + # !!! GENERATE YOUR OWN COPY FOR PRODUCTION USAGE !!! + SYNCMASTER__ENCRYPTION__SECRET_KEY=UBgPTioFrtH2unlC4XFDiGf5sYfzbdSf_VgiUSaQc94= + + # Common RabbitMQ options + SYNCMASTER__BROKER__URL=amqp://guest:guest@rabbitmq:5672 + + # Server options + SYNCMASTER__SERVER__SESSION__SECRET_KEY=generate_some_random_string + # !!! NEVER USE ON PRODUCTION !!! + SYNCMASTER__SERVER__DEBUG=true + + # Keycloak Auth + #SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider + SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 + SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=generated_by_keycloak + SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback + SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email + SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False + + # Dummy Auth + SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.dummy_provider.DummyAuthProvider + SYNCMASTER__AUTH__ACCESS_TOKEN__SECRET_KEY=generate_another_random_string + + # Scheduler options + SYNCMASTER__SCHEDULER__TRANSFER_FETCHING_TIMEOUT_SECONDS=200 + + # Worker options + SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://logs.location.example.com/syncmaster-worker?correlation_id=\{\{ correlation_id \}\}&run_id=\{\{ run.id \}\} + SYNCMASTER__HWM_STORE__ENABLED=true + SYNCMASTER__HWM_STORE__TYPE=horizon + SYNCMASTER__HWM_STORE__URL=http://horizon:8000 + SYNCMASTER__HWM_STORE__NAMESPACE=syncmaster_namespace + SYNCMASTER__HWM_STORE__USER=admin + SYNCMASTER__HWM_STORE__PASSWORD=123UsedForTestOnly@! + + # Frontend options + SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 + + # Cors + SYNCMASTER__SERVER__CORS__ENABLED=True + SYNCMASTER__SERVER__CORS__ALLOW_ORIGINS=["http://localhost:3000"] + SYNCMASTER__SERVER__CORS__ALLOW_CREDENTIALS=True + SYNCMASTER__SERVER__CORS__ALLOW_METHODS=["*"] + SYNCMASTER__SERVER__CORS__ALLOW_HEADERS=["*"] + SYNCMASTER__SERVER__CORS__EXPOSE_HEADERS=["X-Request-ID","Location","Access-Control-Allow-Credentials"] + ``` + +- After server is started and ready, open . + +### Without docker + +- Install Python 3.11 or above + +- Setup [Relation Database][database], run migrations + +- Setup [Message Broker][message-broker] + +- Create virtual environment + + ```console + $ python -m venv /some/.venv + $ source /some/.venv/activate + ... + ``` + +- Install `syncmaster` package with following *extra* dependencies: + + ```console + $ pip install syncmaster[server] + ... + ``` + +- Run server process + + ```console + $ python -m syncmaster.server --host 0.0.0.0 --port 8000 + ... + ``` + + This is a thin wrapper around [uvicorn](https://www.uvicorn.org/#command-line-options) cli, + options and commands are just the same. + +- After server is started and ready, open . + +## See also + +- [Auth Providers][server-auth-providers] +- [Configuration][server-configuration] +- [CLI for managing superusers][manage-superusers-cli] +- [OpenAPI specification][server-openapi] diff --git a/mddocs/reference/server/manage_superusers_cli.md b/mddocs/reference/server/manage_superusers_cli.md new file mode 100644 index 00000000..7baf4a8f --- /dev/null +++ b/mddocs/reference/server/manage_superusers_cli.md @@ -0,0 +1,10 @@ +# CLI for managing superusers { #manage-superusers-cli } + +There are two ways to manage users: + +- automatic: + + Set `SYNCMASTER__ENTRYPOINT__SUPERUSERS=user1,user2`, and [REST API Server][server] Docker container entrypoint + will automatically set `is_superuser=True` flag for them, and reset for other users in database. + +- manual via CLI: diff --git a/mddocs/reference/server/openapi.md b/mddocs/reference/server/openapi.md new file mode 100644 index 00000000..efcc7f02 --- /dev/null +++ b/mddocs/reference/server/openapi.md @@ -0,0 +1,5 @@ +# OpenAPI specification { #server-openapi } + +----8<---- +mddocs/_static/swagger.html +----8<---- diff --git a/mddocs/reference/worker/configuration/broker.md b/mddocs/reference/worker/configuration/broker.md new file mode 100644 index 00000000..a6a502ff --- /dev/null +++ b/mddocs/reference/worker/configuration/broker.md @@ -0,0 +1,3 @@ +# Broker settings { #worker-configuration-broker } + +::: syncmaster.settings.broker.RabbitMQSettings diff --git a/mddocs/reference/worker/configuration/credentials.md b/mddocs/reference/worker/configuration/credentials.md new file mode 100644 index 00000000..17718488 --- /dev/null +++ b/mddocs/reference/worker/configuration/credentials.md @@ -0,0 +1,3 @@ +# Credentials settings { #worker-configuration-credentials } + +::: syncmaster.settings.credentials.CredentialsEncryptionSettings diff --git a/mddocs/reference/worker/configuration/database.md b/mddocs/reference/worker/configuration/database.md new file mode 100644 index 00000000..55d687b1 --- /dev/null +++ b/mddocs/reference/worker/configuration/database.md @@ -0,0 +1,3 @@ +# Database settings { #worker-server-configuration-database } + +::: syncmaster.settings.database.DatabaseSettings diff --git a/mddocs/reference/worker/configuration/hwm_store.md b/mddocs/reference/worker/configuration/hwm_store.md new file mode 100644 index 00000000..2b88d4e8 --- /dev/null +++ b/mddocs/reference/worker/configuration/hwm_store.md @@ -0,0 +1,3 @@ +# HWM Store settings { #worker-configuration-hwm-store } + +::: syncmaster.worker.settings.hwm_store.HWMStoreSettings diff --git a/mddocs/reference/worker/configuration/index.md b/mddocs/reference/worker/configuration/index.md new file mode 100644 index 00000000..57ada418 --- /dev/null +++ b/mddocs/reference/worker/configuration/index.md @@ -0,0 +1,11 @@ +# Configuration { #worker-configuration } + +- [Database][worker-server-configuration-database] +- [Broker][worker-configuration-broker] +- [Credentials][worker-configuration-credentials] +- [Logging][worker-configuration-logging] +- [Hwm_store][worker-configuration-hwm-store] + +::: syncmaster.worker.settings.WorkerAppSettings + +::: syncmaster.worker.settings.WorkerSettings diff --git a/mddocs/reference/worker/configuration/logging.md b/mddocs/reference/worker/configuration/logging.md new file mode 100644 index 00000000..5ecbede5 --- /dev/null +++ b/mddocs/reference/worker/configuration/logging.md @@ -0,0 +1,3 @@ +# Logging settings { #worker-configuration-logging } + +::: syncmaster.settings.log.LoggingSettings diff --git a/mddocs/reference/worker/create_spark_session.md b/mddocs/reference/worker/create_spark_session.md new file mode 100644 index 00000000..8fc0537e --- /dev/null +++ b/mddocs/reference/worker/create_spark_session.md @@ -0,0 +1,32 @@ +# Altering Spark session settings { #worker-create-spark-session } + +SyncMaster Worker creates [SparkSession](https://spark.apache.org/docs/latest/sql-getting-started.html#starting-point-sparksession) for each Run. +By default, SparkSession is created with `master=local`, all required .jar packages for specific DB/FileSystem types, and limiter by transfer resources. + +It is possible to alter SparkSession config by providing custom function: + +```bash +SYNCMASTER__WORKER__CREATE_SPARK_SESSION_FUNCTION=my_worker.spark.create_custom_spark_session +``` + +Here is a function example: + +```python +:caption: my_workers/spark.py + +from syncmaster.db.models import Run +from syncmaster.dto.connections import ConnectionDTO +from pyspark.sql import SparkSession + +def create_custom_spark_session( + run: Run, + source: ConnectionDTO, + target: ConnectionDTO, +) -> SparkSession: + # any custom code returning SparkSession object + return SparkSession.builde.config(...).getOrCreate() +``` + +Module with custom function should be placed in the same Docker image or Python virtual environment used by SyncMaster worker. + +> **For now, SyncMaster haven't been tested with `master=k8s` and `master=yarn`, so there can be some caveats.** diff --git a/mddocs/reference/worker/index.md b/mddocs/reference/worker/index.md new file mode 100644 index 00000000..9e28e24a --- /dev/null +++ b/mddocs/reference/worker/index.md @@ -0,0 +1,299 @@ +# Worker { #worker } + +SyncMaster worker is a dedicated process which receives new transfer Runs from [message-broker][message-broker], +executes them and updates status & log url in [database][database]. Implemented using [Celery](https://docs.celeryq.dev). + +## NOTE + +Each worker process is bound to one ot more Queues. You have to created it before starting a worker. +This can be done via [Frontend][frontend] or via [REST API Server][server] REST API. + +Queue field `slug` value is then should be passed to Celery argument `-Q`. +For example, for slug `123-test_queue` this should be `-Q 123-test_queue`. + +## Install & run + +### With docker + +- Install [Docker](https://docs.docker.com/engine/install/) + +- Install [docker-compose](https://github.com/docker/compose/releases/) + +- Go to `frontend ` + +- Create new Group + +- Create Queue in this group, and then get **Queue.slug** (e.g. `123-test_queue`) + +- Run the following command: + + ```console + $ docker compose --profile worker up -d --wait + ... + ``` + + `docker-compose` will download all necessary images, create containers, and then start the worker. + + Options can be set via `.env` file or `environment` section in `docker-compose.yml` + + ### `docker-compose.yml` + + ``` + services: + db: + image: postgres:17 + restart: unless-stopped + environment: + POSTGRES_DB: syncmaster + POSTGRES_USER: syncmaster + POSTGRES_PASSWORD: changeme + ports: + - 5432:5432 + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: pg_isready + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + db-migrations: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: no + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + entrypoint: [python, -m, syncmaster.db.migrations, upgrade, head] + env_file: .env.docker + depends_on: + db: + condition: service_healthy + + rabbitmq: + image: rabbitmq:4 + restart: unless-stopped + ports: + - 5672:5672 + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: rabbitmq-diagnostics -q ping + start_period: 5s + interval: 30s + timeout: 5s + retries: 3 + + server: + image: mtsrus/syncmaster-server:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.server + context: . + target: prod + ports: + - 8000:8000 + environment: + # list here usernames which should be assigned SUPERUSER role on application start + SYNCMASTER__ENTRYPOINT__SUPERUSERS: admin + # PROMETHEUS_MULTIPROC_DIR is required for multiple workers, see: + # https://prometheus.github.io/client_python/multiprocess/ + PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-metrics + # tmpfs dir is cleaned up each container restart + tmpfs: + - /tmp/prometheus-metrics:mode=1777 + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - server + - frontend + - all + + worker: + image: mtsrus/syncmaster-worker:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.worker + context: . + target: prod + env_file: .env.docker + command: --loglevel=info -Q 123-test_queue # Queue.slug + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - worker + - all + + scheduler: + image: mtsrus/syncmaster-scheduler:${VERSION:-develop} + restart: unless-stopped + build: + dockerfile: docker/Dockerfile.scheduler + context: . + target: prod + env_file: .env.docker + depends_on: + db: + condition: service_healthy + db-migrations: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + profiles: + - scheduler + - all + + frontend: + image: mtsrus/syncmaster-ui:${VERSION:-develop} + restart: unless-stopped + env_file: .env.docker + ports: + - 3000:3000 + depends_on: + server: + condition: service_healthy + profiles: + - frontend + - all + + volumes: + postgres_data: + rabbitmq_data: + ``` + + ### `.env.docker` + + ``` + TZ=UTC + ENV=LOCAL + + # Logging options + SYNCMASTER__LOGGING__SETUP=True + SYNCMASTER__LOGGING__PRESET=colored + + # Common DB options + SYNCMASTER__DATABASE__URL=postgresql+asyncpg://syncmaster:changeme@db:5432/syncmaster + + # Encrypt / Decrypt credentials data using this Fernet key. + # !!! GENERATE YOUR OWN COPY FOR PRODUCTION USAGE !!! + SYNCMASTER__ENCRYPTION__SECRET_KEY=UBgPTioFrtH2unlC4XFDiGf5sYfzbdSf_VgiUSaQc94= + + # Common RabbitMQ options + SYNCMASTER__BROKER__URL=amqp://guest:guest@rabbitmq:5672 + + # Server options + SYNCMASTER__SERVER__SESSION__SECRET_KEY=generate_some_random_string + # !!! NEVER USE ON PRODUCTION !!! + SYNCMASTER__SERVER__DEBUG=true + + # Keycloak Auth + #SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.keycloak_provider.KeycloakAuthProvider + SYNCMASTER__AUTH__KEYCLOAK__SERVER_URL=http://keycloak:8080 + SYNCMASTER__AUTH__KEYCLOAK__REALM_NAME=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_ID=manually_created + SYNCMASTER__AUTH__KEYCLOAK__CLIENT_SECRET=generated_by_keycloak + SYNCMASTER__AUTH__KEYCLOAK__REDIRECT_URI=http://localhost:8000/auth/callback + SYNCMASTER__AUTH__KEYCLOAK__SCOPE=email + SYNCMASTER__AUTH__KEYCLOAK__VERIFY_SSL=False + + # Dummy Auth + SYNCMASTER__AUTH__PROVIDER=syncmaster.server.providers.auth.dummy_provider.DummyAuthProvider + SYNCMASTER__AUTH__ACCESS_TOKEN__SECRET_KEY=generate_another_random_string + + # Scheduler options + SYNCMASTER__SCHEDULER__TRANSFER_FETCHING_TIMEOUT_SECONDS=200 + + # Worker options + SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://logs.location.example.com/syncmaster-worker?correlation_id=\{\{ correlation_id \}\}&run_id=\{\{ run.id \}\} + SYNCMASTER__HWM_STORE__ENABLED=true + SYNCMASTER__HWM_STORE__TYPE=horizon + SYNCMASTER__HWM_STORE__URL=http://horizon:8000 + SYNCMASTER__HWM_STORE__NAMESPACE=syncmaster_namespace + SYNCMASTER__HWM_STORE__USER=admin + SYNCMASTER__HWM_STORE__PASSWORD=123UsedForTestOnly@! + + # Frontend options + SYNCMASTER__UI__API_BROWSER_URL=http://localhost:8000 + + # Cors + SYNCMASTER__SERVER__CORS__ENABLED=True + SYNCMASTER__SERVER__CORS__ALLOW_ORIGINS=["http://localhost:3000"] + SYNCMASTER__SERVER__CORS__ALLOW_CREDENTIALS=True + SYNCMASTER__SERVER__CORS__ALLOW_METHODS=["*"] + SYNCMASTER__SERVER__CORS__ALLOW_HEADERS=["*"] + SYNCMASTER__SERVER__CORS__EXPOSE_HEADERS=["X-Request-ID","Location","Access-Control-Allow-Credentials"] + ``` + +### Without docker + +- Install Python 3.11 or above + +- Install Java 8 or above + + ```console + $ yum install java-1.8.0-openjdk-devel # CentOS 7 + $ dnf install java-11-openjdk-devel # CentOS 8 + $ apt-get install openjdk-11-jdk # Debian-based + ... + ``` + +- Setup [Relation Database][database], run migrations + +- Setup [Message Broker][message-broker] + +- Create virtual environment + + ```console + $ python -m venv /some/.venv + $ source /some/.venv/activate + ... + ``` + +- Install `syncmaster` package with following *extra* dependencies: + + ```console + $ pip install syncmaster[server,worker] + ... + ``` + +- Start [REST API Server][server] and [Frontend][frontend] + +- Create new Group + +- Create Queue in this group, and then get **Queue.slug** (e.g. `123-test_queue`) + +- Run worker process: + + ```console + $ python -m celery -A syncmaster.worker.celery worker -Q 123-test_queue --max-tasks-per-child=1 + ... + ``` + + You can specify options like concurrency and queues by adding additional flags: + + ```bash + $ python -m celery -A syncmaster.worker.celery worker -Q 123-test_queue --max-tasks-per-child=1 --concurrency=4 --loglevel=info + ... + ``` + + Refer to the [Celery](https://docs.celeryq.dev/en/stable/) documentation for more advanced start options. + + > **`--max-tasks-per-child=1` flag is important!** + +## See also + +- [Configuration][worker-configuration] +- [Altering Spark session settings][worker-create-spark-session] +- [Setting the Run.log_url value][worker-log-url] diff --git a/mddocs/reference/worker/log_url.md b/mddocs/reference/worker/log_url.md new file mode 100644 index 00000000..8dd30a9c --- /dev/null +++ b/mddocs/reference/worker/log_url.md @@ -0,0 +1,13 @@ +# Setting the `Run.log_url` value { #worker-log-url } + +Each run in the system is linked to a log URL where the Celery worker logs are available. +This log URL might point to an Elastic instance or another logging tool such as Grafana. + +The log URL is generated based on a template configured in the configuration. +The configuration parameter is: + +```bash +SYNCMASTER__WORKER__LOG_URL_TEMPLATE=https://grafana.example.com?correlation_id=\{\{correlation_id\}\}&run_id=\{\{run.id\}\} +``` + +In this example, run logs can be retrieved by either its correlation id `x-request-id` in http headers, or by `Run.Id` field value. diff --git a/mddocs/robots.txt b/mddocs/robots.txt new file mode 100644 index 00000000..029a5351 --- /dev/null +++ b/mddocs/robots.txt @@ -0,0 +1,5 @@ +User-agent: * +Allow: /*/stable/ +Allow: /en/stable/ # Fallback for bots that don't understand wildcards +Disallow: / +Sitemap: https://data-syncmaster.readthedocs.io/sitemap.xml \ No newline at end of file diff --git a/mddocs/security.md b/mddocs/security.md new file mode 100644 index 00000000..58741e94 --- /dev/null +++ b/mddocs/security.md @@ -0,0 +1,25 @@ +# Security + +## Supported Python versions + +3.11 or above + +## Product development security recommendations + +1. Update dependencies to last stable version +2. Build SBOM for the project +3. Perform SAST (Static Application Security Testing) where possible + +## Product development security requirements + +1. No binaries in repository +2. No passwords, keys, access tokens in source code +3. No “Critical” and/or “High” vulnerabilities in contributed source code + +## Vulnerability reports + +Please, use email [mailto:onetools@mts.ru](mailto:onetools@mts.ru) for reporting security issues or anything that can cause any consequences for security. + +Please avoid any public disclosure (including registering issues) at least until it is fixed. + +Thank you in advance for understanding.