diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 0000000..90b6d79 --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,53 @@ +name: Deploy docs to Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ".[docs]" + - name: Build + run: | + make html + working-directory: + ./docs + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Upload artifact + uses: actions/upload-pages-artifact@v4 + with: + path: './docs/build/html/' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 90b6d79..47f1b9d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,30 +1,11 @@ -name: Deploy docs to Pages +name: Verify docs build on: - # Runs on pushes targeting the default branch - push: - branches: ["main"] - - # Allows you to run this workflow manually from the Actions tab + pull_request: workflow_dispatch: -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. -concurrency: - group: "pages" - cancel-in-progress: false - jobs: - deploy: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} + build-docs: runs-on: ubuntu-latest steps: - name: Checkout @@ -42,12 +23,3 @@ jobs: make html working-directory: ./docs - - name: Setup Pages - uses: actions/configure-pages@v5 - - name: Upload artifact - uses: actions/upload-pages-artifact@v4 - with: - path: './docs/build/html/' - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/docs/source/api/data_collections_api.rst b/docs/source/api/data_collections_api.rst deleted file mode 100644 index 1572033..0000000 --- a/docs/source/api/data_collections_api.rst +++ /dev/null @@ -1,53 +0,0 @@ -data\_collections\_api package -============================== - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - data_collections_api.cli - -Submodules ----------- - -data\_collections\_api.dumpers module -------------------------------------- - -.. automodule:: data_collections_api.dumpers - :members: - :show-inheritance: - :undoc-members: - -data\_collections\_api.invenio module -------------------------------------- - -.. automodule:: data_collections_api.invenio - :members: - :show-inheritance: - :undoc-members: - -data\_collections\_api.metadata module --------------------------------------- - -.. automodule:: data_collections_api.metadata - :members: - :show-inheritance: - :undoc-members: - -data\_collections\_api.schema module ------------------------------------- - -.. automodule:: data_collections_api.schema - :members: - :show-inheritance: - :undoc-members: - -Module contents ---------------- - -.. automodule:: data_collections_api - :members: - :show-inheritance: - :undoc-members: diff --git a/docs/source/api/modules.rst b/docs/source/api/modules.rst deleted file mode 100644 index 19f1e46..0000000 --- a/docs/source/api/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -API Documentation -================= - -.. toctree:: - :maxdepth: 4 - - data_collections_api diff --git a/docs/source/cli.rst b/docs/source/cli.rst index 4f77bc0..4e4b11a 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -1,32 +1,106 @@ CLI Usage ========= -``data_collections_api`` provides a few commandline tools for -simplifying the process of uploading or verifying data. +``data_collections_api`` provides a few command-line tools for simplifying the process of uploading +or verifying data and metadata. data_collections ---------------- -``data_collections`` is the general top-level interface to the -tools. These tools are implemented as sub-parsers within the main -module. +``data_collections`` is the general top-level interface to the tools. These tools are implemented as +sub-parsers within the main module. + +.. admonition:: Running ``data_collections`` + + By default, if the ``data_collections_api`` package is installed, ``data_collections`` is + installed as an executable script on your main ``PATH``. In general, this is the main entry + point. + + If that is not desired, it is possible to run ``data_collections`` through the python module + system:: + + python -m data_collections_api + + where the ``data_collections_api`` **module** (folder) is on the current ``sys.path`` (by being installed, in + the current ``PYTHONPATH`` or being in the current working directory.):: + + PYTHONPATH=/path/containing/data_collections_api python -m data_collections_api + + Throughout the rest of this page, we will assume ``data_collections`` is used as the main entrypoint. upload ****** -Construct a set of data and upload a set of files along with the metadata to an -Invenio repository. + Construct a set of data and upload a set of files along with the metadata to an Invenio repository. + +``data_collections_api`` can take your data and metadata and automatically upload it to the Invenio +repository. To do so, you need to have some information at hand: + +- The URL of the repository you wish to upload the data to. In the case of PSDI data, this will + often be https://data-collections.psdi.ac.uk. +- Your API key (also called a Personal Access Token or PAT, see `pat_guide`_ for how to create + this.) for the repository to give permissions to write and upload data. +- A metadata file detailing the data relating to the files (see `template`_). +- The files ready to upload. + +With all this prepared, uploading the data is as simple as: + +:: + + data_collections upload --api-url https://data-collections.psdi.ac.uk --api-key 1234567890abcdef --metadata-path /path/to/metata_file.yaml --files FILE1 FILE2 --community my_community + +.. note:: + + Since this is a common operation it is also available as the standalone ``upload_record`` validate ******** -Validate the metadata file for a dataset before uploading. + Validate the metadata file for a dataset before uploading. + +``data_collections_api`` can validate your metadata file against the schema to verify the contents +of the file match what is required to make a valid upload. + +.. note:: + + The validator does not verify most data itself, you must ensure that all entries are spelled and + written correctly. + +To validate a data file simply run: + +:: + + data_collections validate [file] + +e.g. + +:: + + data_collections validate examples/biosim_record.yaml + +The file can be either in ``JSON`` or ``YAML`` formats. ``validate`` will attempt to determine the +appropriate format from the file extension, but this can be specified explicitly with the ``-f`` +flag. + +:: + + data_collections validate -f json examples/biosim_record.yaml + +.. note:: + + The above will raise an error since the file is not in ``json`` format. + +template +******** + + Dump a template metadata file ready for modification to upload. -dump -**** +``data_collections_api`` provides a method to quick-start building metadata, ``template`` will dump +an example metadata file for a particular community and data-type (though currently only a basic +example is available). To do so, simply run:: -Dump a template metadata file ready for modification to upload. + data_collections dump my_metadata +You can then edit and modify this template to fill in the data needed. -upload_record -------------- +.. _pat_guide: ... diff --git a/docs/source/conf.py b/docs/source/conf.py index 829180d..0388379 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,6 +23,7 @@ extensions = [ "numpydoc", + "sphinx.ext.apidoc", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", @@ -31,6 +32,10 @@ "sphinxcontrib.contentui", ] +apidoc_modules = [ + {"path": "../../data_collections_api", "destination": "./api"}, +] + always_use_bars_union = True napoleon_include_special_with_doc = True napoleon_use_param = True diff --git a/docs/source/index.rst b/docs/source/index.rst index 766a609..f40822b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -14,4 +14,4 @@ Project to allow simplified editing and construction of Invenio data for the PSD :caption: Contents: cli - api/modules + API Documentation