From 8719a59701aef0bef8a1a424cbfc6a40002db4b6 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Thu, 18 Sep 2025 18:21:30 -0400 Subject: [PATCH 01/12] Add mkdocs dependencies, configs and placeholders --- .../sections/user-guide/notebook-examples.md | 6 + docs/mkdocs/sections/user-guide/user-guide.md | 0 mkdocs.yml | 43 +++ poetry.lock | 266 +++++++++++++++++- pyproject.toml | 3 + 5 files changed, 314 insertions(+), 4 deletions(-) create mode 100644 docs/mkdocs/sections/user-guide/notebook-examples.md create mode 100644 docs/mkdocs/sections/user-guide/user-guide.md create mode 100644 mkdocs.yml diff --git a/docs/mkdocs/sections/user-guide/notebook-examples.md b/docs/mkdocs/sections/user-guide/notebook-examples.md new file mode 100644 index 0000000..bd2b320 --- /dev/null +++ b/docs/mkdocs/sections/user-guide/notebook-examples.md @@ -0,0 +1,6 @@ +# Notebook Examples + +There are a few notebook examples available. + +- [How to use STAC API](https://github.com/RolnickLab/geospatial-tools/blob/main/notebooks/stac_api_tools.ipynb) +- [Exploring Sentinel 2 data from Planetary Computer](https://github.com/RolnickLab/geospatial-tools/blob/main/notebooks/planetary_computer_sentinel2_exploration.ipynb) \ No newline at end of file diff --git a/docs/mkdocs/sections/user-guide/user-guide.md b/docs/mkdocs/sections/user-guide/user-guide.md new file mode 100644 index 0000000..e69de29 diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..036851b --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,43 @@ +site_name: Geospatial Tools + +docs_dir: docs/mkdocs + +nav: + - User Guide: + - sections/user-guide/user-guide.md + - sections/user-guide/notebook-examples.md + + - Developer Guide: + - sections/dev-guide/developer-guide.md + +theme: + name: material + palette: + scheme: slate + + +markdown_extensions: + - admonition + +extra_css: + - css/extra.css + +plugins: + - search + - mkdocstrings: + default_handler: python + handlers: + python: + paths: [geospatial_tools] + options: + docstring_style: google + members_order: source + annotations_path: brief + show_docstring_attributes: true + modernize_annotations: true + show_source: true + show_submodules: false + separate_signature: true + signature_crossrefs: true + show_signature_annotations: true + allow_inspection: false diff --git a/poetry.lock b/poetry.lock index b4182f8..40ea8c2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -714,7 +714,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\"", lab = "sys_platform == \"win32\""} +markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\"", lab = "sys_platform == \"win32\""} [[package]] name = "colorlog" @@ -1465,6 +1465,39 @@ shapely = ">=2.0.0" all = ["GeoAlchemy2", "SQLAlchemy (>=1.3)", "folium", "geopy", "mapclassify", "matplotlib (>=3.5.0)", "psycopg-binary (>=3.1.0)", "pyarrow (>=8.0.0)", "xyzservices"] dev = ["black", "codecov", "pre-commit", "pytest (>=3.1.0)", "pytest-cov", "pytest-xdist"] +[[package]] +name = "ghp-import" +version = "2.1.0" +description = "Copy your docs directly to the gh-pages branch." +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, + {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, +] + +[package.dependencies] +python-dateutil = ">=2.8.1" + +[package.extras] +dev = ["flake8", "markdown", "twine", "wheel"] + +[[package]] +name = "griffe" +version = "1.14.0" +description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "griffe-1.14.0-py3-none-any.whl", hash = "sha256:0e9d52832cccf0f7188cfe585ba962d2674b241c01916d780925df34873bceb0"}, + {file = "griffe-1.14.0.tar.gz", hash = "sha256:9d2a15c1eca966d68e00517de5d69dd1bc5c9f2335ef6c1775362ba5b8651a13"}, +] + +[package.dependencies] +colorama = ">=0.4" + [[package]] name = "h11" version = "0.16.0" @@ -1810,7 +1843,7 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" -groups = ["main", "lab", "secondary"] +groups = ["main", "dev", "lab", "secondary"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, @@ -2297,6 +2330,22 @@ files = [ {file = "locket-1.0.0.tar.gz", hash = "sha256:5c0d4c052a8bbbf750e056a8e65ccd309086f4f0f18a2eac306a8dfa4112a632"}, ] +[[package]] +name = "markdown" +version = "3.9" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "markdown-3.9-py3-none-any.whl", hash = "sha256:9f4d91ed810864ea88a6f32c07ba8bee1346c0cc1f6b1f9f6c822f2a9667d280"}, + {file = "markdown-3.9.tar.gz", hash = "sha256:d2900fe1782bd33bdbbd56859defef70c2e78fc46668f8eb9df3128138f2cb6a"}, +] + +[package.extras] +docs = ["mdx_gh_links (>=0.2)", "mkdocs (>=1.6)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -2328,7 +2377,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["main", "lab", "secondary"] +groups = ["main", "dev", "lab", "secondary"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -2582,6 +2631,18 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "mergedeep" +version = "1.3.4" +description = "A deep merge function for 🐍." +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, + {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, +] + [[package]] name = "mistune" version = "3.1.3" @@ -2597,6 +2658,114 @@ files = [ [package.dependencies] typing-extensions = {version = "*", markers = "python_version < \"3.11\""} +[[package]] +name = "mkdocs" +version = "1.6.1" +description = "Project documentation with Markdown." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"}, + {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"}, +] + +[package.dependencies] +click = ">=7.0" +colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} +ghp-import = ">=1.0" +jinja2 = ">=2.11.1" +markdown = ">=3.3.6" +markupsafe = ">=2.0.1" +mergedeep = ">=1.3.4" +mkdocs-get-deps = ">=0.2.0" +packaging = ">=20.5" +pathspec = ">=0.11.1" +pyyaml = ">=5.1" +pyyaml-env-tag = ">=0.1" +watchdog = ">=2.0" + +[package.extras] +i18n = ["babel (>=2.9.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] + +[[package]] +name = "mkdocs-autorefs" +version = "1.4.3" +description = "Automatically link across pages in MkDocs." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocs_autorefs-1.4.3-py3-none-any.whl", hash = "sha256:469d85eb3114801d08e9cc55d102b3ba65917a869b893403b8987b601cf55dc9"}, + {file = "mkdocs_autorefs-1.4.3.tar.gz", hash = "sha256:beee715b254455c4aa93b6ef3c67579c399ca092259cc41b7d9342573ff1fc75"}, +] + +[package.dependencies] +Markdown = ">=3.3" +markupsafe = ">=2.0.1" +mkdocs = ">=1.1" + +[[package]] +name = "mkdocs-get-deps" +version = "0.2.0" +description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"}, + {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"}, +] + +[package.dependencies] +mergedeep = ">=1.3.4" +platformdirs = ">=2.2.0" +pyyaml = ">=5.1" + +[[package]] +name = "mkdocstrings" +version = "0.30.0" +description = "Automatic documentation from sources, for MkDocs." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocstrings-0.30.0-py3-none-any.whl", hash = "sha256:ae9e4a0d8c1789697ac776f2e034e2ddd71054ae1cf2c2bb1433ccfd07c226f2"}, + {file = "mkdocstrings-0.30.0.tar.gz", hash = "sha256:5d8019b9c31ddacd780b6784ffcdd6f21c408f34c0bd1103b5351d609d5b4444"}, +] + +[package.dependencies] +Jinja2 = ">=2.11.1" +Markdown = ">=3.6" +MarkupSafe = ">=1.1" +mkdocs = ">=1.6" +mkdocs-autorefs = ">=1.4" +pymdown-extensions = ">=6.3" + +[package.extras] +crystal = ["mkdocstrings-crystal (>=0.3.4)"] +python = ["mkdocstrings-python (>=1.16.2)"] +python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] + +[[package]] +name = "mkdocstrings-python" +version = "1.18.2" +description = "A Python handler for mkdocstrings." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocstrings_python-1.18.2-py3-none-any.whl", hash = "sha256:944fe6deb8f08f33fa936d538233c4036e9f53e840994f6146e8e94eb71b600d"}, + {file = "mkdocstrings_python-1.18.2.tar.gz", hash = "sha256:4ad536920a07b6336f50d4c6d5603316fafb1172c5c882370cbbc954770ad323"}, +] + +[package.dependencies] +griffe = ">=1.13" +mkdocs-autorefs = ">=1.4" +mkdocstrings = ">=0.30" +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -3760,6 +3929,37 @@ tomlkit = ">=0.10.1" spelling = ["pyenchant (>=3.2,<4.0)"] testutils = ["gitpython (>3)"] +[[package]] +name = "pymdown-extensions" +version = "10.16.1" +description = "Extension pack for Python Markdown." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pymdown_extensions-10.16.1-py3-none-any.whl", hash = "sha256:d6ba157a6c03146a7fb122b2b9a121300056384eafeec9c9f9e584adfdb2a32d"}, + {file = "pymdown_extensions-10.16.1.tar.gz", hash = "sha256:aace82bcccba3efc03e25d584e6a22d27a8e17caa3f4dd9f207e49b787aa9a91"}, +] + +[package.dependencies] +markdown = ">=3.6" +pyyaml = "*" + +[package.extras] +extra = ["pygments (>=2.19.1)"] + +[[package]] +name = "pyment" +version = "0.3.3" +description = "Generate/convert automatically the docstrings from code signature" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "Pyment-0.3.3-py2.py3-none-any.whl", hash = "sha256:a0c6ec59d06d24aeec3eaecb22115d0dc95d09e14209b2df838381fdf47a78cc"}, + {file = "Pyment-0.3.3.tar.gz", hash = "sha256:951a4c52d6791ccec55bc739811169eed69917d3874f5fe722866623a697f39d"}, +] + [[package]] name = "pyogrio" version = "0.8.0" @@ -4160,6 +4360,21 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "pyyaml-env-tag" +version = "1.1" +description = "A custom YAML tag for referencing environment variables in YAML files." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04"}, + {file = "pyyaml_env_tag-1.1.tar.gz", hash = "sha256:2eb38b75a2d21ee0475d6d97ec19c63287a7e140231e4214969d0eac923cd7ff"}, +] + +[package.dependencies] +pyyaml = "*" + [[package]] name = "pyzmq" version = "26.4.0" @@ -5112,6 +5327,49 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +[[package]] +name = "watchdog" +version = "6.0.0" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"}, + {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"}, + {file = "watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3"}, + {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c"}, + {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"}, + {file = "watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b"}, + {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e6f0e77c9417e7cd62af82529b10563db3423625c5fce018430b249bf977f9e8"}, + {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90c8e78f3b94014f7aaae121e6b909674df5b46ec24d6bebc45c44c56729af2a"}, + {file = "watchdog-6.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c"}, + {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881"}, + {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11"}, + {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7a0e56874cfbc4b9b05c60c8a1926fedf56324bb08cfbc188969777940aef3aa"}, + {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6439e374fc012255b4ec786ae3c4bc838cd7309a540e5fe0952d03687d8804e"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2"}, + {file = "watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a"}, + {file = "watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680"}, + {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"}, + {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + [[package]] name = "wcwidth" version = "0.2.13" @@ -5272,4 +5530,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "97d0142190a0e2b17e30b0bdc814f5f08eee37af4b8e5b441032a2a09b3260f0" +content-hash = "f6dab5d9fbfcc1e7f3f109218c5a20aa9eb487feade9079d733466e8a588a639" diff --git a/pyproject.toml b/pyproject.toml index d681c05..f15fc1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,9 @@ ruff = "^0.11.10" mdformat = "^0.7.22" mdformat-gfm = "^0.4.1" mdformat-gfm-alerts = "^2.0.0" +mkdocs = "^1.6.1" +mkdocstrings-python = "^1.18.2" +pyment = "^0.3.3" [tool.poetry.group.lab.dependencies] jupyterlab = "^4.0.10" From 06ba8fe75330c6d58788b86a179ae190e41f6d08 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Thu, 18 Sep 2025 18:22:16 -0400 Subject: [PATCH 02/12] Convert stac.py's docstring to google format to try out mkdocstrings --- .../sections/dev-guide/developer-guide.md | 2 + geospatial_tools/stac.py | 312 +++++++++++------- 2 files changed, 196 insertions(+), 118 deletions(-) create mode 100644 docs/mkdocs/sections/dev-guide/developer-guide.md diff --git a/docs/mkdocs/sections/dev-guide/developer-guide.md b/docs/mkdocs/sections/dev-guide/developer-guide.md new file mode 100644 index 0000000..8291c26 --- /dev/null +++ b/docs/mkdocs/sections/dev-guide/developer-guide.md @@ -0,0 +1,2 @@ +# Stac +::: geospatial_tools.stac diff --git a/geospatial_tools/stac.py b/geospatial_tools/stac.py index 75c77c3..a9cb7bb 100644 --- a/geospatial_tools/stac.py +++ b/geospatial_tools/stac.py @@ -33,9 +33,12 @@ def create_planetary_computer_catalog(max_retries=3, delay=5, logger=LOGGER) -> """ Creates a Planetary Computer Catalog Client. - Returns - ------- - Planetary computer catalog client + Args: + max_retries: (Default value = 3) + delay: (Default value = 5) + logger: (Default value = LOGGER) + + Returns: """ for attempt in range(1, max_retries + 1): try: @@ -53,6 +56,15 @@ def create_planetary_computer_catalog(max_retries=3, delay=5, logger=LOGGER) -> def catalog_generator(catalog_name, logger=LOGGER) -> pystac_client.Client | None: + """ + + Args: + catalog_name: + logger: (Default value = LOGGER) + + Returns: + STAC Client + """ catalog_dict = {PLANETARY_COMPUTER: create_planetary_computer_catalog} if catalog_name not in catalog_dict: logger.error(f"Unsupported catalog name: {catalog_name}") @@ -64,11 +76,22 @@ def catalog_generator(catalog_name, logger=LOGGER) -> pystac_client.Client | Non def list_available_catalogs(logger: logging.Logger = LOGGER) -> frozenset[str]: + """ + + Args: + logger: logging.Logger: (Default value = LOGGER) + + Returns: + + + """ logger.info("Available catalogs") return CATALOG_NAME_LIST class AssetSubItem: + """""" + def __init__(self, asset, item_id: str, band: str, filename: str | pathlib.Path): if isinstance(filename, str): filename = pathlib.Path(filename) @@ -79,6 +102,8 @@ def __init__(self, asset, item_id: str, band: str, filename: str | pathlib.Path) class Asset: + """""" + def __init__( self, asset_id: str, @@ -96,11 +121,21 @@ def __init__( self.logger = logger def add_asset_item(self, asset: AssetSubItem): + """ + + Args: + asset: AssetSubItem: + + Returns: + + + """ if not self.list: self.list = [] self.list.append(asset) def show_asset_items(self): + """Show items that belong to this asset.""" asset_list = [] for asset_sub_item in self.list: asset_list.append( @@ -111,6 +146,16 @@ def show_asset_items(self): def merge_asset( self, base_directory: str | pathlib.Path | None = None, delete_sub_items: bool = False ) -> pathlib.Path | None: + """ + + Args: + base_directory: str | pathlib.Path | None: (Default value = None) + delete_sub_items: bool: (Default value = False) + + Returns: + + + """ if not base_directory: base_directory = "" if isinstance(base_directory, str): @@ -145,6 +190,17 @@ def reproject_merged_asset( base_directory: str | pathlib.Path = None, delete_merged_asset: bool = False, ): + """ + + Args: + target_projection: str | int: + base_directory: str | pathlib.Path: (Default value = None) + delete_merged_asset: bool: (Default value = False) + + Returns: + + + """ if not base_directory: base_directory = "" if isinstance(base_directory, str): @@ -167,6 +223,7 @@ def reproject_merged_asset( return None def delete_asset_sub_items(self): + """""" self.logger.info(f"Deleting asset sub items from asset [{self.asset_id}]") if self.list: for item in self.list: @@ -174,20 +231,24 @@ def delete_asset_sub_items(self): item.filename.unlink() def delete_merged_asset(self): + """""" self.logger.info(f"Deleting merged asset file for [{self.merged_asset_path}]") self.merged_asset_path.unlink() def delete_reprojected_asset(self): + """""" self.logger.info(f"Deleting reprojected asset file for [{self.reprojected_asset_path}]") self.reprojected_asset_path.unlink() def _create_merged_asset_metadata(self): + """""" self.logger.info("Creating merged asset metadata") file_list = [asset.filename for asset in self.list] meta = create_merged_raster_bands_metadata(file_list) return meta def _get_asset_total_bands(self): + """""" downloaded_file_list = [asset.filename for asset in self.list] total_band_count = get_total_band_count(downloaded_file_list) return total_band_count @@ -225,10 +286,8 @@ def search( Parameter descriptions taken from pystac docs. - Parameters - ---------- - date_range - Either a single datetime or datetime range used to filter results. You may express a single datetime + Args: + date_range: Either a single datetime or datetime range used to filter results. You may express a single datetime using a datetime. datetime instance, a RFC 3339-compliant timestamp, or a simple date string (see below). Timezone unaware instances are assumed to represent UTC timestamps. You may represent a datetime range using a "/" separated string as described @@ -236,41 +295,49 @@ def search( use either ".." ('2020-01-01:00:00:00Z/..', ['2020-01-01:00:00:00Z', '..']) or a value of None (['2020-01-01:00:00:00Z', None]). If using a simple date string, the datetime can be specified in YYYY-mm-dd format, optionally truncating to YYYY-mm or just YYYY. - Simple date strings will be expanded to include the entire time period, for example: - * 2017 expands to 2017-01-01T00:00:00Z/ 2017-12-31T23:59:59Z - * 2017-06 expands to 2017-06-01T00:00:00Z/ 2017-06-30T23:59:59Z - * 2017-06-10 expands to 2017-06-10T00:00:00Z/ 2017-06-10T23:59:59Z + + * 2017 expands to + 2017-01-01T00:00:00Z/ 2017-12-31T23:59:59Z + * 2017-06 expands to + 2017-06-01T00:00:00Z/ 2017-06-30T23:59:59Z + * 2017-06-10 expands to + 2017-06-10T00:00:00Z/ 2017-06-10T23:59:59Z If used in a range, the end of the range expands to the end of that day/ month/ year, for example: - * 2017/ 2018 expands to 2017-01-01T00:00:00Z/ 2018-12-31T23:59:59Z - * 2017-06/ 2017-07 expands to 2017-06-01T00:00:00Z/ 2017-07-31T23:59:59Z - * 2017-06-10/ 2017-06-11 expands to 2017-06-10T00:00:00Z/ 2017-06-11T23:59:59Z - max_items - The maximum number of items to return from the search, even if there are + + * 2017/ 2018 expands to + 2017-01-01T00:00:00Z/ 2018-12-31T23:59:59Z + * 2017-06/ 2017-07 expands to + 2017-06-01T00:00:00Z/ 2017-07-31T23:59:59Z + * 2017-06-10/ 2017-06-11 expands to + 2017-06-10T00:00:00Z/ 2017-06-11T23:59:59Z + (Default value = None) + max_items: The maximum number of items to return from the search, even if there are more matching results. - limit - A recommendation to the service as to the number of items to return per + limit: A recommendation to the service as to the number of items to return per page of results. - ids - List of one or more Item ids to filter on. - collections - List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the provided + ids: List of one or more Item ids to filter on. + collections: List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the provided Collections will be searched - bbox - A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be filtered + bbox: A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be filtered to only those intersecting the bounding box. - intersects - A string or dictionary representing a GeoJSON geometry, or an object that implements a __geo_interface__ + intersects: A string or dictionary representing a GeoJSON geometry, or an object that implements a __geo_interface__ property, as supported by several libraries including Shapely, ArcPy, PySAL, and geojson. Results filtered to only those intersecting the geometry. - query - List or JSON of query parameters as per the STAC API query extension. - sortby - A single field or list of fields to sort the response by - - Returns - ------- - An item list of search results. + query: List or JSON of query parameters as per the STAC API query extension. + sortby: A single field or list of fields to sort the response by + max_items: int | None: (Default value = None) + limit: int | None: (Default value = None) + ids: list | None: (Default value = None) + collections: str | list | None: (Default value = None) + bbox: geotools_types.BBoxLike | None: (Default value = None) + intersects: geotools_types.IntersectsLike | None: (Default value = None) + query: dict | None: (Default value = None) + sortby: list | dict | None: (Default value = None) + max_retries: (Default value = 3) + delay: (Default value = 5) + + Returns: """ if isinstance(collections, str): collections = [collections] @@ -329,34 +396,31 @@ def search_for_date_ranges( Parameter descriptions taken from pystac docs. - Parameters - ---------- - date_ranges - List containing datetime date ranges - max_items - The maximum number of items to return from the search, even if there are - more matching results. - limit - A recommendation to the service as to the number of items to return per - page of results. - collections - List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the provided - Collections will be searched - bbox - A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be filtered - to only those intersecting the bounding box. - intersects - A string or dictionary representing a GeoJSON geometry, or an object that implements a __geo_interface__ - property, as supported by several libraries including Shapely, ArcPy, PySAL, and geojson. Results - filtered to only those intersecting the geometry. - query - List or JSON of query parameters as per the STAC API query extension. - sortby - A single field or list of fields to sort the response by - - Returns - ------- - An item list of search results. + Args: + date_ranges: List containing datetime date ranges + max_items: The maximum number of items to return from the search, even if there are more matching results + limit: A recommendation to the service as to the number of items to return per page of results. + collections: List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the + provided Collections will be searched + bbox: A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be + filtered to only those intersecting the bounding box. + intersects: A string or dictionary representing a GeoJSON geometry, or an object that implements + a __geo_interface__ property, as supported by several libraries including Shapely, ArcPy, PySAL, and + geojson. Results filtered to only those intersecting the geometry. + query: List or JSON of query parameters as per the STAC API query extension. + sortby: A single field or list of fields to sort the response by + date_ranges: list[str]: + max_items: int | None: (Default value = None) + limit: int | None: (Default value = None) + collections: str | list | None: (Default value = None) + bbox: geotools_types.BBoxLike | None: (Default value = None) + intersects: geotools_types.IntersectsLike | None: (Default value = None) + query: dict | None: (Default value = None) + sortby: list | dict | None: (Default value = None) + max_retries: (Default value = 3) + delay: (Default value = 5) + + Returns: """ results = [] if isinstance(collections, str): @@ -409,6 +473,23 @@ def _base_catalog_search( query: dict | None = None, sortby: list | dict | None = None, ): + """ + + Args: + date_range: str: + max_items: int | None: (Default value = None) + limit: int | None: (Default value = None) + ids: list | None: (Default value = None) + collections: str | list | None: (Default value = None) + bbox: geotools_types.BBoxLike | None: (Default value = None) + intersects: geotools_types.IntersectsLike | None: (Default value = None) + query: dict | None: (Default value = None) + sortby: list | dict | None: (Default value = None) + + Returns: + + + """ search = self.catalog.search( datetime=date_range, max_items=max_items, @@ -431,14 +512,7 @@ def _base_catalog_search( return list(items) def sort_results_by_cloud_coverage(self) -> list | None: - """ - Sort results by cloud coverage. - - Returns - ------- - List - List of sorted items. - """ + """Sort results by cloud coverage.""" if self.search_results: self.logger.debug("Sorting results by cloud cover (from least to most)") self.cloud_cover_sorted_results = sorted( @@ -452,13 +526,11 @@ def filter_no_data(self, property_name: str, max_no_data_value: int = 5) -> list """ Filter results and sorted results that are above a nodata value threshold. - Parameters - ---------- - property_name - Name of the property to filter by. For example, with Sentinel 2 data, this - property is named `s2:nodata_pixel_percentage` - max_no_data_value - Maximum nodata value to filter by. + Args: + property_name: str: + max_no_data_value: int: (Default value = 5) + + Returns: """ sorted_results = self.cloud_cover_sorted_results if not sorted_results: @@ -477,17 +549,16 @@ def filter_no_data(self, property_name: str, max_no_data_value: int = 5) -> list def _download_assets(self, item: pystac.Item, bands: list, base_directory: pathlib.Path) -> Asset: """ - Parameters - ---------- - item - Search result item - bands - List of bands to download from asset - base_directory - Base directory where assets will be downloaded + Args: + item: Search result item + bands: List of bands to download from asset + base_directory: Base directory where assets will be downloaded + item: pystac.Item: + bands: list: + base_directory: pathlib.Path: + + Returns: - Returns - ------- """ image_id = item.id @@ -512,6 +583,17 @@ def _download_assets(self, item: pystac.Item, bands: list, base_directory: pathl def _download_results( self, results: list[pystac.Item] | None, bands: list, base_directory: str | pathlib.Path ) -> list[Asset]: + """ + + Args: + results: list[pystac.Item] | None: + bands: list: + base_directory: str | pathlib.Path: + + Returns: + + + """ if not results: return [] downloaded_search_results = [] @@ -529,15 +611,14 @@ def _download_results( def download_search_results(self, bands: list, base_directory: str | pathlib.Path) -> list[Asset]: """ - Parameters - ---------- - bands - List of bands to download from asset - base_directory - Base directory where assets will be downloaded + Args: + bands: List of bands to download from asset + base_directory: Base directory where assets will be downloaded + bands: list: + base_directory: str | pathlib.Path: + + Returns: - Returns - ------- """ downloaded_search_results = self._download_results( @@ -547,6 +628,7 @@ def download_search_results(self, bands: list, base_directory: str | pathlib.Pat return downloaded_search_results def _generate_best_results(self): + """""" results = [] if self.filtered_results: results = self.filtered_results @@ -564,19 +646,16 @@ def download_sorted_by_cloud_cover_search_results( ) -> list[Asset]: """ - Parameters - ---------- - bands - List of bands to download from asset - base_directory - Base directory where assets will be downloaded - first_x_num_of_items - Number of items to download from the results + Args: + bands: List of bands to download from asset + base_directory: Base directory where assets will be downloaded + first_x_num_of_items: Number of items to download from the results + bands: list: + base_directory: str | pathlib.Path: + first_x_num_of_items: int | None: (Default value = None) + + Returns: - Returns - ------- - List - List of Assets """ results = self._generate_best_results() @@ -591,17 +670,14 @@ def download_sorted_by_cloud_cover_search_results( def download_best_cloud_cover_result(self, bands: list, base_directory: str | pathlib.Path) -> Asset | None: """ - Parameters - ---------- - bands - List of bands to download from asset - base_directory - Base directory where assets will be downloaded - - Returns - ------- - Asset - Asset + Args: + bands: List of bands to download from asset + base_directory: Base directory where assets will be downloaded + bands: list: + base_directory: str | pathlib.Path: + + Returns: + """ results = self._generate_best_results() From 835acbbfc5eac6b45fd6f9e5d766748a9bccebef Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 14:08:51 -0400 Subject: [PATCH 03/12] Convert project's docstrings to Google standard --- geospatial_tools/download.py | 41 +- geospatial_tools/geotools_types.py | 13 +- .../planetary_computer/sentinel_2.py | 206 ++++++---- geospatial_tools/raster.py | 166 ++++---- geospatial_tools/stac.py | 148 ++++---- geospatial_tools/utils.py | 127 +++---- geospatial_tools/vector.py | 353 +++++++++--------- 7 files changed, 568 insertions(+), 486 deletions(-) diff --git a/geospatial_tools/download.py b/geospatial_tools/download.py index 59e9dce..acd2d91 100644 --- a/geospatial_tools/download.py +++ b/geospatial_tools/download.py @@ -11,6 +11,17 @@ def _download_from_link( target_download: str, output_name: str = None, output_directory: str | Path = DATA_DIR ) -> list[str | Path]: + """ + + Args: + target_download: str: + output_name: str: (Default value = None) + output_directory: str | Path: (Default value = DATA_DIR) + + Returns: + + + """ file_configs = get_yaml_config("data_file_links") key = target_download url = file_configs[key]["url"] @@ -28,16 +39,13 @@ def download_usa_polygon(output_name: str = USA_POLYGON, output_directory: str | """ Download USA polygon file. - Parameters - ---------- - output_name - What name to give to downloaded file - output_directory - Where to save the downloaded file + Args: + output_name: What name to give to downloaded file + output_directory: Where to save the downloaded file + output_name: str: (Default value = USA_POLYGON) + output_directory: str | Path: (Default value = DATA_DIR) - Returns - ------- - List of output path to downloaded file + Returns: """ file_list = _download_from_link( target_download=USA_POLYGON, output_name=output_name, output_directory=output_directory @@ -51,16 +59,13 @@ def download_s2_tiling_grid( """ " Download Sentinel 2 tiling grid file. - Parameters - ---------- - output_name - What name to give to downloaded file - output_directory - Where to save the downloaded file + Args: + output_name: What name to give to downloaded file + output_directory: Where to save the downloaded file + output_name: str: (Default value = SENTINEL_2_TILLING_GRID) + output_directory: str | Path: (Default value = DATA_DIR) - Returns - ------- - List of output path to downloaded file + Returns: """ file_list = _download_from_link( target_download=SENTINEL_2_TILLING_GRID, output_name=output_name, output_directory=output_directory diff --git a/geospatial_tools/geotools_types.py b/geospatial_tools/geotools_types.py index b13de62..4039ec4 100644 --- a/geospatial_tools/geotools_types.py +++ b/geospatial_tools/geotools_types.py @@ -1,5 +1,8 @@ """This module contains constants and functions pertaining to data types.""" +from datetime import datetime +from typing import Iterator, Union + from shapely.geometry import ( GeometryCollection, LineString, @@ -11,4 +14,12 @@ ) BBoxLike = tuple[float, float, float, float] -IntersectsLike = Point | Polygon | LineString | MultiPolygon | MultiPoint | MultiLineString | GeometryCollection +IntersectsLike = Union[Point, Polygon, LineString, MultiPolygon, MultiPoint, MultiLineString, GeometryCollection] +DateLike = Union[ + datetime, + str, + None, + tuple[Union[datetime, str, None], Union[datetime, str, None]], + list[Union[datetime, str, None]], + Iterator[Union[datetime, str, None]], +] diff --git a/geospatial_tools/planetary_computer/sentinel_2.py b/geospatial_tools/planetary_computer/sentinel_2.py index 9fef5f7..1e60c61 100644 --- a/geospatial_tools/planetary_computer/sentinel_2.py +++ b/geospatial_tools/planetary_computer/sentinel_2.py @@ -41,27 +41,19 @@ def __init__( ): """ - Parameters - ---------- - sentinel2_tiling_grid - GeoDataFrame containing Sentinel 2 tiling grid - sentinel2_tiling_grid_column - Name of the column in `sentinel2_tiling_grid` that contains the tile names - (ex tile name: 10SDJ) - vector_features - GeoDataFrame containing the vector features for which the best Sentinel 2 - products will be chosen for. - vector_features_column - Name of the column in `vector_features` where the best Sentinel 2 products - will be written to - date_ranges - Date range used to search for Sentinel 2 products. should be created using - `geospatial_tools.utils.create_date_range_for_specific_period` separately, - or `BestProductsForFeatures.create_date_range` after initialization. - max_cloud_cover - Maximum cloud cover used to search for Sentinel 2 products. - logger - Logger instance + Args: + sentinel2_tiling_grid: GeoDataFrame containing Sentinel 2 tiling grid + sentinel2_tiling_grid_column: Name of the column in `sentinel2_tiling_grid` that contains the tile names + (ex tile name: 10SDJ) + vector_features: GeoDataFrame containing the vector features for which the best Sentinel 2 + products will be chosen for. + vector_features_column: Name of the column in `vector_features` where the best Sentinel 2 products + will be written to + date_ranges: Date range used to search for Sentinel 2 products. should be created using + `geospatial_tools.utils.create_date_range_for_specific_period` separately, + or `BestProductsForFeatures.create_date_range` after initialization. + max_cloud_cover: Maximum cloud cover used to search for Sentinel 2 products. + logger: Logger instance """ self.logger = logger self.sentinel2_tiling_grid = sentinel2_tiling_grid @@ -80,18 +72,38 @@ def __init__( @property def max_cloud_cover(self): + """""" return self._max_cloud_cover @max_cloud_cover.setter def max_cloud_cover(self, max_cloud_cover: int): + """ + + Args: + max_cloud_cover: int: + + Returns: + + + """ self._max_cloud_cover = max_cloud_cover @property def date_ranges(self): + """""" return self._date_ranges @date_ranges.setter def date_ranges(self, date_range: list[str]): + """ + + Args: + date_range: list[str]: + + Returns: + + + """ self._date_ranges = date_range def create_date_ranges(self, start_year: int, end_year: int, start_month: int, end_month: int) -> list[str]: @@ -106,20 +118,17 @@ def create_date_ranges(self, start_year: int, end_year: int, start_month: int, e For example, I want to create date ranges for 2020 and 2022, but only for the months from November to January. I therefore expect to have 2 ranges: [2020-11-01 to 2021-01-31, 2021-11-01 to 2022-01-31]. - Parameters - ---------- - start_year - Start year for ranges - end_year - End year for ranges - start_month - Starting month for each period - end_month - End month for each period (inclusively) - - Returns - ------- - List containing datetime date ranges + Args: + start_year: Start year for ranges + end_year: End year for ranges + start_month: Starting month for each period + end_month: End month for each period (inclusively) + start_year: int: + end_year: int: + start_month: int: + end_month: int: + + Returns: """ self.date_ranges = create_date_range_for_specific_period( start_year=start_year, end_year=end_year, start_month_range=start_month, end_month_range=end_month @@ -134,10 +143,11 @@ def find_best_complete_products(self, max_cloud_cover: int | None = None, max_no Filtered out tiles will be stored in `self.incomplete` and tiles for which the search has found no results will be stored in `self.error_list` - Returns - ------- - tile_dict: - Tile dictionary containing the successful search results. + Args: + max_cloud_cover: int | None: (Default value = None) + max_no_data_value: int: (Default value = 5) + + Returns: """ cloud_cover = self.max_cloud_cover if max_cloud_cover: @@ -171,12 +181,7 @@ def find_best_complete_products(self, max_cloud_cover: int | None = None, max_no return self.successful_results def select_best_products_per_feature(self) -> GeoDataFrame: - """ - - Returns - ------- - - """ + """""" spatial_join_results = spatial_join_within( polygon_features=self.sentinel2_tiling_grid, polygon_column=self.sentinel2_tiling_grid_column, @@ -193,6 +198,15 @@ def select_best_products_per_feature(self) -> GeoDataFrame: return self.vector_features_with_products def to_file(self, output_dir: str | pathlib.Path) -> None: + """ + + Args: + output_dir: str | pathlib.Path: + + Returns: + + + """ write_results_to_file( cloud_cover=self.max_cloud_cover, successful_results=self.successful_results, @@ -208,6 +222,18 @@ def sentinel_2_complete_tile_search( max_cloud_cover: int, max_no_data_value: int = 5, ) -> tuple[int, str, float | None, float | None] | None: + """ + + Args: + tile_id: int: + date_ranges: list[str]: + max_cloud_cover: int: + max_no_data_value: int: (Default value = 5) + + Returns: + + + """ client = StacSearch(PLANETARY_COMPUTER) collection = "sentinel-2-l2a" tile_ids = [tile_id] @@ -247,6 +273,19 @@ def find_best_product_per_s2_tile( max_no_data_value: int = 5, num_of_workers: int = 4, ): + """ + + Args: + date_ranges: list[str]: + max_cloud_cover: int: + s2_tile_grid_list: list: + max_no_data_value: int: (Default value = 5) + num_of_workers: int: (Default value = 4) + + Returns: + + + """ successful_results = {} for tile in s2_tile_grid_list: successful_results[tile] = "" @@ -280,6 +319,17 @@ def find_best_product_per_s2_tile( def _get_best_product_id_for_each_grid_tile( s2_tile_search_results: dict, feature_s2_tiles: GeoDataFrame, logger: logging.Logger = LOGGER ) -> str | None: + """ + + Args: + s2_tile_search_results: dict: + feature_s2_tiles: GeoDataFrame: + logger: logging.Logger: (Default value = LOGGER) + + Returns: + + + """ search_result_keys = s2_tile_search_results.keys() all_keys_present = all(item in search_result_keys for item in feature_s2_tiles) if not all_keys_present: @@ -311,6 +361,19 @@ def write_best_product_ids_to_dataframe( s2_tiles_column: str = "s2_tiles", logger: logging.Logger = LOGGER, ): + """ + + Args: + spatial_join_results: GeoDataFrame: + tile_dictionary: dict: + best_product_column: str: (Default value = "best_s2_product_id") + s2_tiles_column: str: (Default value = "s2_tiles") + logger: logging.Logger: (Default value = LOGGER) + + Returns: + + + """ logger.info("Writing best product IDs to dataframe") spatial_join_results[best_product_column] = spatial_join_results[s2_tiles_column].apply( lambda x: _get_best_product_id_for_each_grid_tile(s2_tile_search_results=tile_dictionary, feature_s2_tiles=x) @@ -325,6 +388,20 @@ def write_results_to_file( output_dir: str | pathlib.Path = DATA_DIR, logger: logging.Logger = LOGGER, ) -> dict: + """ + + Args: + cloud_cover: int: + successful_results: dict: + incomplete_results: list | None: (Default value = None) + error_results: list | None: (Default value = None) + output_dir: str | pathlib.Path: (Default value = DATA_DIR) + logger: logging.Logger: (Default value = LOGGER) + + Returns: + + + """ tile_filename = output_dir / f"data_lt{cloud_cover}cc.json" with open(tile_filename, "w", encoding="utf-8") as json_file: json.dump(successful_results, json_file, indent=4) @@ -368,28 +445,25 @@ def download_and_process_sentinel2_asset( It will download the individual asset bands provided in the `bands` argument, merge then all in a single tif and then reproject them to the input CRS. - Parameters - ---------- - product_id - ID of the Sentinel 2 product to be downloaded - product_bands - List of the product bands to be downloaded - collections - Collections to be downloaded from. Defaults to `sentinel-2-l2a` - target_projection - The CRS project for the end product. If `None`, the reprojection step will be + Args: + product_id: ID of the Sentinel 2 product to be downloaded + product_bands: List of the product bands to be downloaded + collections: Collections to be downloaded from. Defaults to `sentinel-2-l2a` + target_projection: The CRS project for the end product. If `None`, the reprojection step will be skipped - stac_client - StacSearch client to used. A new one will be created if not provided - base_directory - The base directory path where the downloaded files will be stored - delete_intermediate_files - Flag to determine if intermediate files should be deleted. Defaults to False - logger - Logger instance - - Returns - ------- + stac_client: StacSearch client to used. A new one will be created if not provided + base_directory: The base directory path where the downloaded files will be stored + delete_intermediate_files: Flag to determine if intermediate files should be deleted. Defaults to False + logger: Logger instance + product_id: str: + product_bands: list[str]: + collections: str: (Default value = "sentinel-2-l2a") + target_projection: int | str | None: (Default value = None) + base_directory: str | pathlib.Path: (Default value = DATA_DIR) + delete_intermediate_files: bool: (Default value = False) + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ base_file_name = f"{base_directory}/{product_id}" merged_file = f"{base_file_name}_merged.tif" diff --git a/geospatial_tools/raster.py b/geospatial_tools/raster.py index 9f4cd12..6f2e103 100644 --- a/geospatial_tools/raster.py +++ b/geospatial_tools/raster.py @@ -27,18 +27,18 @@ def reproject_raster( ) -> pathlib.Path | None: """ - Parameters - ---------- - dataset_path - Path to the dataset to be reprojected. - target_crs - EPSG code in string or int format. Can be given in the following ways: 5070 | "5070" | "EPSG:5070" - target_path - Path and filename for reprojected dataset. - logger - - Returns - ------- + Args: + dataset_path: Path to the dataset to be reprojected. + target_crs: EPSG code in string or int format. Can be given in the following ways: 5070 | "5070" | "EPSG:5070" + target_path: Path and filename for reprojected dataset. + logger: + dataset_path: str | pathlib.Path: + target_crs: str | int: + target_path: str | pathlib.Path: + logger: logging.Logger: (Default value = LOGGER) + + Returns: + """ if isinstance(dataset_path, str): @@ -83,21 +83,20 @@ def _clip_process( ) -> tuple[int, GeoDataFrame, pathlib.Path] | None: """ - Parameters - ---------- - raster_image - Path to raster image to be clipped. - id_polygon - Tuple containing an id number and a polygon (row from a Geodataframe). - base_output_filename - Base filename for outputs. If `None`, will be taken from input polygon layer. - output_dir - Directory path where output will be written. - - Returns - ------- - Tuple - Tuple containing an id number and a polygon in Geodataframe format. + Args: + raster_image: Path to raster image to be clipped. + id_polygon: Tuple containing an id number and a polygon (row from a Geodataframe). + base_output_filename: Base filename for outputs. If `None`, will be taken from input polygon layer. + output_dir: Directory path where output will be written. + raster_image: pathlib.Path | str: + id_polygon: tuple[int: + GeoDataFrame]: + base_output_filename: str | None: + output_dir: pathlib.Path | str: + logger: logging.Logger: (Default value = LOGGER) + + Returns: + """ polygon_id, polygon = id_polygon @@ -146,25 +145,22 @@ def clip_raster_with_polygon( ) -> list[pathlib.Path]: """ - Parameters - ---------- - raster_image - Path to raster image to be clipped. - polygon_layer - Polygon layer which polygons will be used to clip the raster image. - base_output_filename - Base filename for outputs. If `None`, will be taken from input polygon layer. - output_dir - Directory path where output will be written. - num_of_workers - The number of processes to use for parallel execution. Defaults to `cpu_count()`. - logger - Logger instance - - Returns - ------- - List - List of clipped rasters. + Args: + raster_image: Path to raster image to be clipped. + polygon_layer: Polygon layer which polygons will be used to clip the raster image. + base_output_filename: Base filename for outputs. If `None`, will be taken from input polygon layer. + output_dir: Directory path where output will be written. + num_of_workers: The number of processes to use for parallel execution. Defaults to `cpu_count()`. + logger: Logger instance + raster_image: pathlib.Path | str: + polygon_layer: pathlib.Path | str | GeoDataFrame: + base_output_filename: str | None: (Default value = None) + output_dir: str | pathlib.Path: (Default value = DATA_DIR) + num_of_workers: int | None: (Default value = None) + logger: logging.Logger: (Default value = LOGGER) + + Returns: + """ workers = cpu_count() @@ -221,17 +217,14 @@ def clip_raster_with_polygon( def get_total_band_count(raster_file_list: list[pathlib.Path | str], logger: logging.Logger = LOGGER) -> int: """ - Parameters - ---------- - raster_file_list - List of raster files to be processed. - logger - Logger instance + Args: + raster_file_list: List of raster files to be processed. + logger: Logger instance + raster_file_list: list[pathlib.Path | str]: + logger: logging.Logger: (Default value = LOGGER) + + Returns: - Returns - ------- - int - Total number of bands . """ total_band_count = 0 @@ -247,13 +240,14 @@ def create_merged_raster_bands_metadata( ) -> dict: """ - Parameters - ---------- - raster_file_list - logger + Args: + raster_file_list: + logger: + raster_file_list: list[pathlib.Path | str]: + logger: logging.Logger: (Default value = LOGGER) + + Returns: - Returns - ------- """ logger.info("Creating merged asset metadata") @@ -280,24 +274,22 @@ def merge_raster_bands( While it can probably be used to create spatial time series, and not just combine bands from a single image product, it has not yet been tested for that specific purpose. - Parameters - ---------- - raster_file_list - List of raster files to be processed. - merged_filename - Name of output raster file. - merged_metadata - Dictionary of metadata to use if you prefer to great it independently. - merged_band_names - Names of final output raster bands. For example : I have 3 images representing each - a single band; raster_file_list = ["image01_B0.tif", "image01_B1.tif", "image01_B2.tif"]. - With, merged_band_names, individual band id can be assigned for the final output raster; - ["B0", "B1", "B2"]. - logger - Logger instance - - Returns - ------- + Args: + raster_file_list: List of raster files to be processed. + merged_filename: Name of output raster file. + merged_metadata: Dictionary of metadata to use if you prefer to great it independently. + merged_band_names: Names of final output raster bands. For example : I have 3 images representing each + a single band; raster_file_list = ["image01_B0.tif", "image01_B1.tif", "image01_B2.tif"]. + With, merged_band_names, individual band id can be assigned for the final output raster; + ["B0", "B1", "B2"]. + logger: Logger instance + raster_file_list: list[pathlib.Path | str]: + merged_filename: pathlib.Path | str: + merged_band_names: list[str]: (Default value = None) + merged_metadata: dict: (Default value = None) + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ if not merged_metadata: merged_metadata = create_merged_raster_bands_metadata(raster_file_list) @@ -348,6 +340,20 @@ def _handle_band_metadata( merged_band_names: list[str], merged_image_index: int, ): + """ + + Args: + source_image: rasterio.io.DatasetReader: + source_image_band_index: int: + band_names_index: int: + merged_asset_image: rasterio.io.DatasetWriter: + merged_band_names: list[str]: + merged_image_index: int: + + Returns: + + + """ source_description_index = source_image_band_index - 1 description = source_image.descriptions[source_description_index] num_of_bands = source_image.count diff --git a/geospatial_tools/stac.py b/geospatial_tools/stac.py index a9cb7bb..174b407 100644 --- a/geospatial_tools/stac.py +++ b/geospatial_tools/stac.py @@ -1,8 +1,8 @@ """This module contains functions that are related to STAC API.""" import logging -import pathlib import time +from pathlib import Path import pystac import pystac_client @@ -10,6 +10,7 @@ from pystac_client.exceptions import APIError from geospatial_tools import geotools_types +from geospatial_tools.geotools_types import DateLike from geospatial_tools.raster import ( create_merged_raster_bands_metadata, get_total_band_count, @@ -29,7 +30,7 @@ PLANETARY_COMPUTER_API = "https://planetarycomputer.microsoft.com/api/stac/v1" -def create_planetary_computer_catalog(max_retries=3, delay=5, logger=LOGGER) -> pystac_client.Client | None: +def create_planetary_computer_catalog(max_retries: int = 3, delay=5, logger=LOGGER) -> pystac_client.Client | None: """ Creates a Planetary Computer Catalog Client. @@ -92,13 +93,21 @@ def list_available_catalogs(logger: logging.Logger = LOGGER) -> frozenset[str]: class AssetSubItem: """""" - def __init__(self, asset, item_id: str, band: str, filename: str | pathlib.Path): + def __init__(self, asset, item_id: str, band: str, filename: str | Path): + """ + + Args: + asset: + item_id: + band: + filename: + """ if isinstance(filename, str): - filename = pathlib.Path(filename) + filename = Path(filename) self.asset = asset self.item_id: str = item_id self.band: str = band - self.filename: pathlib.Path = filename + self.filename: Path = filename class Asset: @@ -109,10 +118,20 @@ def __init__( asset_id: str, bands: list[str] | None = None, asset_item_list: list[AssetSubItem] | None = None, - merged_asset_path: str | pathlib.Path | None = None, - reprojected_asset: str | pathlib.Path | None = None, + merged_asset_path: str | Path | None = None, + reprojected_asset: str | Path | None = None, logger: logging.Logger = LOGGER, ): + """ + + Args: + asset_id: + bands: + asset_item_list: + merged_asset_path: + reprojected_asset: + logger: + """ self.asset_id = asset_id self.bands = bands self.list = asset_item_list @@ -143,13 +162,11 @@ def show_asset_items(self): ) self.logger.info(f"Asset list for asset [{self.asset_id}] : \n\t{asset_list}") - def merge_asset( - self, base_directory: str | pathlib.Path | None = None, delete_sub_items: bool = False - ) -> pathlib.Path | None: + def merge_asset(self, base_directory: str | Path | None = None, delete_sub_items: bool = False) -> Path | None: """ Args: - base_directory: str | pathlib.Path | None: (Default value = None) + base_directory: str | Path | None: (Default value = None) delete_sub_items: bool: (Default value = False) Returns: @@ -159,7 +176,7 @@ def merge_asset( if not base_directory: base_directory = "" if isinstance(base_directory, str): - base_directory = pathlib.Path(base_directory) + base_directory = Path(base_directory) merged_filename = base_directory / f"{self.asset_id}_merged.tif" @@ -187,14 +204,14 @@ def merge_asset( def reproject_merged_asset( self, target_projection: str | int, - base_directory: str | pathlib.Path = None, + base_directory: str | Path = None, delete_merged_asset: bool = False, ): """ Args: target_projection: str | int: - base_directory: str | pathlib.Path: (Default value = None) + base_directory: str | Path: (Default value = None) delete_merged_asset: bool: (Default value = False) Returns: @@ -204,7 +221,7 @@ def reproject_merged_asset( if not base_directory: base_directory = "" if isinstance(base_directory, str): - base_directory = pathlib.Path(base_directory) + base_directory = Path(base_directory) target_path = base_directory / f"{self.asset_id}_reprojected.tif" self.logger.info(f"Reprojecting asset [{self.asset_id}] ...") reprojected_filename = reproject_raster( @@ -258,6 +275,12 @@ class StacSearch: """Utility class to help facilitate and automate STAC API searches through the use of `pystac_client.Client`.""" def __init__(self, catalog_name, logger=LOGGER): + """ + + Args: + catalog_name: + logger: + """ self.catalog: pystac_client.Client = catalog_generator(catalog_name=catalog_name) self.search_results: list[pystac.Item] | None = None self.cloud_cover_sorted_results: list[pystac.Item] | None = None @@ -269,7 +292,7 @@ def __init__(self, catalog_name, logger=LOGGER): def search( self, - date_range=None, + date_range: DateLike = None, max_items: int | None = None, limit: int | None = None, ids: list | None = None, @@ -278,7 +301,7 @@ def search( intersects: geotools_types.IntersectsLike | None = None, query: dict | None = None, sortby: list | dict | None = None, - max_retries=3, + max_retries: int = 3, delay=5, ) -> list: """ @@ -287,43 +310,42 @@ def search( Parameter descriptions taken from pystac docs. Args: - date_range: Either a single datetime or datetime range used to filter results. You may express a single datetime - using a datetime. datetime instance, a RFC 3339-compliant timestamp, or a simple date string (see below). - Timezone unaware instances are assumed to represent UTC timestamps. - You may represent a datetime range using a "/" separated string as described - in the spec, or a list, tuple, or iterator of 2 timestamps or datetime instances. For open-ended ranges, - use either ".." ('2020-01-01:00:00:00Z/..', ['2020-01-01:00:00:00Z', '..']) or a value of None - (['2020-01-01:00:00:00Z', None]). If using a simple date string, the datetime can be specified in - YYYY-mm-dd format, optionally truncating to YYYY-mm or just YYYY. - Simple date strings will be expanded to include the entire time period, for example: - - * 2017 expands to - 2017-01-01T00:00:00Z/ 2017-12-31T23:59:59Z - * 2017-06 expands to - 2017-06-01T00:00:00Z/ 2017-06-30T23:59:59Z - * 2017-06-10 expands to - 2017-06-10T00:00:00Z/ 2017-06-10T23:59:59Z - If used in a range, the end of the range expands to the end of that day/ month/ year, for example: - - * 2017/ 2018 expands to - 2017-01-01T00:00:00Z/ 2018-12-31T23:59:59Z - * 2017-06/ 2017-07 expands to - 2017-06-01T00:00:00Z/ 2017-07-31T23:59:59Z - * 2017-06-10/ 2017-06-11 expands to - 2017-06-10T00:00:00Z/ 2017-06-11T23:59:59Z - (Default value = None) + date_range: Either a single datetime or datetime range used to filter results. + You may express a single datetime using a :class:`datetime.datetime` + instance, a `RFC 3339-compliant `__ + timestamp, or a simple date string (see below). Instances of + :class:`datetime.datetime` may be either + timezone aware or unaware. Timezone aware instances will be converted to + a UTC timestamp before being passed + to the endpoint. Timezone unaware instances are assumed to represent UTC + timestamps. You may represent a + datetime range using a ``"/"`` separated string as described in the + spec, or a list, tuple, or iterator + of 2 timestamps or datetime instances. For open-ended ranges, use either + ``".."`` (``'2020-01-01:00:00:00Z/..'``, + ``['2020-01-01:00:00:00Z', '..']``) or a value of ``None`` + (``['2020-01-01:00:00:00Z', None]``). + If using a simple date string, the datetime can be specified in + ``YYYY-mm-dd`` format, optionally truncating + to ``YYYY-mm`` or just ``YYYY``. Simple date strings will be expanded to + include the entire time period, for example: ``2017`` expands to + ``2017-01-01T00:00:00Z/2017-12-31T23:59:59Z`` and ``2017-06`` expands + to ``2017-06-01T00:00:00Z/2017-06-30T23:59:59Z`` + If used in a range, the end of the range expands to the end of that + day/month/year, for example: ``2017-06-10/2017-06-11`` expands to + ``2017-06-10T00:00:00Z/2017-06-11T23:59:59Z`` (Default value = None) max_items: The maximum number of items to return from the search, even if there are more matching results. limit: A recommendation to the service as to the number of items to return per page of results. ids: List of one or more Item ids to filter on. - collections: List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the provided - Collections will be searched + collections: List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the + provided Collections will be searched bbox: A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be filtered to only those intersecting the bounding box. - intersects: A string or dictionary representing a GeoJSON geometry, or an object that implements a __geo_interface__ - property, as supported by several libraries including Shapely, ArcPy, PySAL, and geojson. Results - filtered to only those intersecting the geometry. + intersects: A string or dictionary representing a GeoJSON geometry, or an object that implements a + __geo_interface__ property, as supported by several libraries including Shapely, ArcPy, PySAL, and geojson. + Results filtered to only those intersecting the geometry. query: List or JSON of query parameters as per the STAC API query extension. sortby: A single field or list of fields to sort the response by max_items: int | None: (Default value = None) @@ -377,7 +399,7 @@ def search( def search_for_date_ranges( self, - date_ranges: list[str], + date_ranges: list[DateLike], max_items: int | None = None, limit: int | None = None, collections: str | list | None = None, @@ -385,7 +407,7 @@ def search_for_date_ranges( intersects: geotools_types.IntersectsLike | None = None, query: dict | None = None, sortby: list | dict | None = None, - max_retries=3, + max_retries: int = 3, delay=5, ) -> list: """ @@ -463,7 +485,7 @@ def search_for_date_ranges( def _base_catalog_search( self, - date_range: str, + date_range: DateLike, max_items: int | None = None, limit: int | None = None, ids: list | None = None, @@ -476,7 +498,7 @@ def _base_catalog_search( """ Args: - date_range: str: + date_range: max_items: int | None: (Default value = None) limit: int | None: (Default value = None) ids: list | None: (Default value = None) @@ -546,7 +568,7 @@ def filter_no_data(self, property_name: str, max_no_data_value: int = 5) -> list return filtered_results - def _download_assets(self, item: pystac.Item, bands: list, base_directory: pathlib.Path) -> Asset: + def _download_assets(self, item: pystac.Item, bands: list, base_directory: Path) -> Asset: """ Args: @@ -555,7 +577,7 @@ def _download_assets(self, item: pystac.Item, bands: list, base_directory: pathl base_directory: Base directory where assets will be downloaded item: pystac.Item: bands: list: - base_directory: pathlib.Path: + base_directory: Path: Returns: @@ -581,14 +603,14 @@ def _download_assets(self, item: pystac.Item, bands: list, base_directory: pathl return downloaded_files def _download_results( - self, results: list[pystac.Item] | None, bands: list, base_directory: str | pathlib.Path + self, results: list[pystac.Item] | None, bands: list, base_directory: str | Path ) -> list[Asset]: """ Args: results: list[pystac.Item] | None: bands: list: - base_directory: str | pathlib.Path: + base_directory: str | Path: Returns: @@ -597,8 +619,8 @@ def _download_results( if not results: return [] downloaded_search_results = [] - if not isinstance(base_directory, pathlib.Path): - base_directory = pathlib.Path(base_directory) + if not isinstance(base_directory, Path): + base_directory = Path(base_directory) if not base_directory.exists(): base_directory.mkdir(parents=True, exist_ok=True) @@ -608,14 +630,14 @@ def _download_results( downloaded_search_results.append(downloaded_item) return downloaded_search_results - def download_search_results(self, bands: list, base_directory: str | pathlib.Path) -> list[Asset]: + def download_search_results(self, bands: list, base_directory: str | Path) -> list[Asset]: """ Args: bands: List of bands to download from asset base_directory: Base directory where assets will be downloaded bands: list: - base_directory: str | pathlib.Path: + base_directory: str | Path: Returns: @@ -642,7 +664,7 @@ def _generate_best_results(self): return results def download_sorted_by_cloud_cover_search_results( - self, bands: list, base_directory: str | pathlib.Path, first_x_num_of_items: int | None = None + self, bands: list, base_directory: str | Path, first_x_num_of_items: int | None = None ) -> list[Asset]: """ @@ -651,7 +673,7 @@ def download_sorted_by_cloud_cover_search_results( base_directory: Base directory where assets will be downloaded first_x_num_of_items: Number of items to download from the results bands: list: - base_directory: str | pathlib.Path: + base_directory: str | Path: first_x_num_of_items: int | None: (Default value = None) Returns: @@ -667,14 +689,14 @@ def download_sorted_by_cloud_cover_search_results( self.downloaded_cloud_cover_sorted_assets = downloaded_search_results return downloaded_search_results - def download_best_cloud_cover_result(self, bands: list, base_directory: str | pathlib.Path) -> Asset | None: + def download_best_cloud_cover_result(self, bands: list, base_directory: str | Path) -> Asset | None: """ Args: bands: List of bands to download from asset base_directory: Base directory where assets will be downloaded bands: list: - base_directory: str | pathlib.Path: + base_directory: str | Path: Returns: diff --git a/geospatial_tools/utils.py b/geospatial_tools/utils.py index a3666db..e71dd88 100644 --- a/geospatial_tools/utils.py +++ b/geospatial_tools/utils.py @@ -22,14 +22,11 @@ def create_logger(logger_name: str) -> logging.Logger: """ Creates a logger object using input name parameter that outputs to stdout. - Parameters - ---------- - logger_name - Name of logger - - Returns - ------- - Created logger object + Args: + logger_name: Name of logger + logger_name: str: + + Returns: """ logging_level = logging.INFO app_config_path = CONFIGS / "geospatial_tools_ini.yaml" @@ -64,18 +61,14 @@ def get_yaml_config(yaml_config_file: str, logger: logging.Logger = LOGGER) -> d Ex. For a file named app_config.yml (or app_config.yaml), directly in the config/ folder, the function could be called like so : `params = get_yaml_config('app_config')` - Parameters - ---------- - yaml_config_file - Path to yaml config file. If config file is in the config folder, + Args: + yaml_config_file: Path to yaml config file. If config file is in the config folder, you can use the file's name without the extension. - logger - Logger to handle messaging, by default LOGGER + logger: Logger to handle messaging, by default LOGGER + yaml_config_file: str: + logger: logging.Logger: (Default value = LOGGER) - Returns - ------- - dict - Dictionary of YAML configuration values + Returns: """ potential_paths = [ @@ -114,16 +107,12 @@ def get_json_config(json_config_file: str, logger=LOGGER) -> dict: Ex. For a file named app_config.json, directly in the config/ folder, the function could be called like so : `params = get_json_config('app_config')` - Parameters - ---------- - json_config_file - Path to JSON config file. If config file is in the config folder, - logger - Logger to handle messaging, by default LOGGER + Args: + json_config_file: Path to JSON config file. If config file is in the config folder, + logger: Logger to handle messaging, by default LOGGER + json_config_file: str: - Returns - ------- - Dictionary of JSON configuration values + Returns: """ potential_paths = [ @@ -155,16 +144,13 @@ def get_json_config(json_config_file: str, logger=LOGGER) -> dict: def create_crs(dataset_crs: str | int, logger=LOGGER): """ - Parameters - ---------- - dataset_crs - EPSG code in string or int format. Can be given in the following ways: 5070 | "5070" | "EPSG:5070" - logger: - Logger instance + Args: + dataset_crs: EPSG code in string or int format. Can be given in the following ways: 5070 | "5070" | "EPSG:5070" + logger: Logger instance (Default value = LOGGER) + dataset_crs: str | int: + + Returns: - Returns - ------- - EPSG code in string format : EPSG: """ logger.info(f"Creating EPSG code from following input : [{dataset_crs}]") @@ -189,19 +175,16 @@ def download_url(url: str, filename: str | Path, overwrite: bool = False, logger """ This function downloads a file from a given URL. - Parameters - ---------- - url - Url to download - filename - Filename (or full path) to save the downloaded file - overwrite - If True, overwrite existing file - logger - Logger instance - - Returns - ------- + Args: + url: Url to download + filename: Filename (or full path) to save the downloaded file + overwrite: If True, overwrite existing file + logger: Logger instance (Default value = LOGGER) + url: str: + filename: str | Path: + overwrite: bool: (Default value = False) + + Returns: """ if isinstance(filename, str): filename = Path(filename) @@ -225,18 +208,15 @@ def unzip_file(zip_path: str | Path, extract_to: str | Path, logger: logging.Log """ This function unzips an archive to a specific directory. - Parameters - ---------- - zip_path - Path to zip file - extract_to - Path of directory to extract the zip file - logger - Logger instance - - Returns - ------- - List of unzipped paths + Args: + zip_path: Path to zip file + extract_to: Path of directory to extract the zip file + logger: Logger instance + zip_path: str | Path: + extract_to: str | Path: + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ if isinstance(zip_path, str): zip_path = Path(zip_path) @@ -266,20 +246,17 @@ def create_date_range_for_specific_period( For example, I want to create date ranges for 2020 and 2022, but only for the months from November to January. I therefore expect to have 2 ranges: [2020-11-01 to 2021-01-31, 2021-11-01 to 2022-01-31]. - Parameters - ---------- - start_year - Start year for ranges - end_year - End year for ranges - start_month_range - Starting month for each period - end_month_range - End month for each period (inclusively) - - Returns - ------- - List containing datetime date ranges + Args: + start_year: Start year for ranges + end_year: End year for ranges + start_month_range: Starting month for each period + end_month_range: End month for each period (inclusively) + start_year: int: + end_year: int: + start_month_range: int: + end_month_range: int: + + Returns: """ date_ranges = [] year_bump = 0 diff --git a/geospatial_tools/vector.py b/geospatial_tools/vector.py index e6852c2..2c0475f 100644 --- a/geospatial_tools/vector.py +++ b/geospatial_tools/vector.py @@ -26,19 +26,16 @@ def create_grid_coordinates( """ Create grid coordinates based on input bounding box and grid size. - Parameters - ----------- - bounding_box - The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat). + Args: + bounding_box: The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat). Unit needs to be based on projection used (meters, degrees, etc.). - grid_size - Cell size for grid. Unit needs to be based on projection used (meters, degrees, etc.). - logger - Logger instance. - - Returns - -------- - Tuple containing the longitude and latitude grid coordinates. + grid_size: Cell size for grid. Unit needs to be based on projection used (meters, degrees, etc.). + logger: Logger instance. + bounding_box: list | tuple: + grid_size: float: + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ logger.info(f"Creating grid coordinates for bounding box [{bounding_box}]") min_lon, min_lat, max_lon, max_lat = bounding_box @@ -53,18 +50,15 @@ def generate_flattened_grid_coords( """ Takes in previously created grid coordinates and flattens them. - Parameters - ----------- - lon_coords - Longitude grid coordinates - lat_coords - Latitude grid coordinates - logger - Logger instance. - - Returns - -------- - Flattened longitude and latitude grids. + Args: + lon_coords: Longitude grid coordinates + lat_coords: Latitude grid coordinates + logger: Logger instance. + lon_coords: ndarray: + lat_coords: ndarray: + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ logger.info("Creating flattened grid coordinates") @@ -78,14 +72,13 @@ def _create_polygons_from_coords_chunk(chunk: tuple[ndarray, ndarray, float]) -> """ Helper function to create polygons from input coordinates chunk. - Parameters - ----------- - chunk - Coordinates chunk as a tuple (longitude coords, latitude coords, grid size). + Args: + chunk: Coordinates chunk as a tuple (longitude coords, latitude coords, grid size). + chunk: tuple[ndarray: + ndarray: + float]: - Returns - -------- - List of polygons. + Returns: """ lon_coords, lat_coords, grid_size = chunk polygons = [] @@ -103,20 +96,17 @@ def create_vector_grid( Create a grid of polygons within the specified bounds and cell size. This function uses NumPy vectorized arrays for optimized performance. - Parameters - ----------- - bounding_box - The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat). - grid_size - The size of each grid cell in degrees. - crs - CRS code for projection. ex. 'EPSG:4326' - logger - Logger instance. - - Returns - -------- - GeoDataFrame containing the grid polygons. + Args: + bounding_box: The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat). + grid_size: The size of each grid cell in degrees. + crs: CRS code for projection. ex. 'EPSG:4326' + logger: Logger instance. + bounding_box: list | tuple: + grid_size: float: + crs: str: (Default value = None) + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ lon_coords, lat_coords = create_grid_coordinates(bounding_box=bounding_box, grid_size=grid_size, logger=logger) lon_flat_grid, lat_flat_grid = generate_flattened_grid_coords( @@ -151,24 +141,20 @@ def create_vector_grid_parallel( Create a grid of polygons within the specified bounds and cell size. This function uses NumPy for optimized performance and ProcessPoolExecutor for parallel execution. - Parameters - ----------- - bounding_box - The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat). - grid_size - The size of each grid cell in degrees. - crs - Coordinate reference system for the resulting GeoDataFrame. - num_of_workers - The number of processes to use for parallel execution. Defaults to the min of number of CPU cores or number - of cells in the grid - logger - Logger instance. - - Returns - -------- - GeoDataFrame: - GeoDataFrame containing the grid polygons. + Args: + bounding_box: The bounding box of the grid as (min_lon, min_lat, max_lon, max_lat). + grid_size: The size of each grid cell in degrees. + crs: Coordinate reference system for the resulting GeoDataFrame. + num_of_workers: The number of processes to use for parallel execution. Defaults to the min of number of CPU cores + or number of cells in the grid + logger: Logger instance. + bounding_box: list | tuple: + grid_size: float: + crs: str | int: (Default value = None) + num_of_workers: int: (Default value = None) + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ lon_coords, lat_coords = create_grid_coordinates(bounding_box=bounding_box, grid_size=grid_size, logger=logger) lon_flat_grid, lat_flat_grid = generate_flattened_grid_coords( @@ -210,6 +196,16 @@ def create_vector_grid_parallel( def _generate_uuid_column(df, column_name="feature_id"): + """ + + Args: + df: + column_name: (Default value = "feature_id") + + Returns: + + + """ df[column_name] = [str(uuid.uuid4()) for _ in range(len(df))] @@ -220,6 +216,19 @@ def dask_spatial_join( predicate: str = "intersects", num_of_workers=4, ) -> GeoDataFrame: + """ + + Args: + select_features_from: GeoDataFrame: + intersected_with: GeoDataFrame: + join_type: str: (Default value = "inner") + predicate: str: (Default value = "intersects") + num_of_workers: (Default value = 4) + + Returns: + + + """ dask_select_gdf = dgpd.from_geopandas(select_features_from, npartitions=num_of_workers) dask_intersected_gdf = dgpd.from_geopandas(intersected_with, npartitions=1) result = dgpd.sjoin(dask_select_gdf, dask_intersected_gdf, how=join_type, predicate=predicate).compute() @@ -239,25 +248,25 @@ def multiprocessor_spatial_join( ) -> GeoDataFrame: """ - Parameters - ---------- - select_features_from - Numpy array containing the polygons from which to select features from. - intersected_with - Geodataframe containing the polygons that will be used to select features with via an intersect operation. - join_type - How the join will be executed. Available join_types are: + Args: + select_features_from: Numpy array containing the polygons from which to select features from. + intersected_with: Geodataframe containing the polygons that will be used to select features with via an + intersect operation. + join_type: How the join will be executed. Available join_types are: ['left', 'right', 'inner']. Defaults to 'inner' - predicate - The predicate to use for selecting features from. Available predicates are: + predicate: The predicate to use for selecting features from. Available predicates are: ['intersects', 'contains', 'within', 'touches', 'crosses', 'overlaps']. Defaults to 'intersects' - num_of_workers - The number of processes to use for parallel execution. Defaults to 4. - logger - Logger instance. + num_of_workers: The number of processes to use for parallel execution. Defaults to 4. + logger: Logger instance. + select_features_from: GeoDataFrame: + intersected_with: GeoDataFrame: + join_type: str: (Default value = "inner") + predicate: str: (Default value = "intersects") + num_of_workers: int: (Default value = 4) + logger: logging.Logger: (Default value = LOGGER) + + Returns: - Returns - ------- """ select_features_from_chunks = np.array_split(select_features_from, num_of_workers) @@ -292,29 +301,26 @@ def select_polygons_by_location( `gpd.sjoin` to allow parallel execution. While it does use `sjoin`, only the columns from `select_features_from` are kept. - Parameters - ---------- - select_features_from - GeoDataFrame containing the polygons from which to select features from. - intersected_with - Geodataframe containing the polygons that will be used to select features with via an intersect operation. - num_of_workers - Number of parallel processes to use for execution. Defaults to the min of number of CPU cores or number - (cpu_count()) - join_type - predicate - The predicate to use for selecting features from. Available predicates are: + Args: + select_features_from: GeoDataFrame containing the polygons from which to select features from. + intersected_with: Geodataframe containing the polygons that will be used to select features with via an intersect + operation. + num_of_workers: Number of parallel processes to use for execution. Defaults to the min of number of CPU cores + or number (cpu_count()) + join_type: + predicate: The predicate to use for selecting features from. Available predicates are: ['intersects', 'contains', 'within', 'touches', 'crosses', 'overlaps']. Defaults to 'intersects' - join_function - Function that will execute the join operation. Available functions are: - 'multiprocessor_spatial_join'; 'dask_spatial_join'; or custom functions. - logger - Logger instance. - - Returns - ------- - GeoDataFrame: - A GeoDataFrame containing the selected polygons. + join_function: Function that will execute the join operation. Available functions are: + 'multiprocessor_spatial_join'; 'dask_spatial_join'; or custom functions. + (Default value = multiprocessor_spatial_join) + logger: Logger instance. + select_features_from: GeoDataFrame: + intersected_with: GeoDataFrame: + num_of_workers: int: (Default value = None) + join_type: str: (Default value = "inner") + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ workers = cpu_count() if num_of_workers: @@ -341,18 +347,14 @@ def to_geopackage(gdf: GeoDataFrame, filename: str | Path, logger=LOGGER) -> str """ Save GeoDataFrame to a Geopackage file. - Parameters - ----------- - gdf - The GeoDataFrame to save. - filename - The filename to save to. - logger - Logger instance - - Returns - -------- - File path of the saved GeoDataFrame. + Args: + gdf: The GeoDataFrame to save. + filename: The filename to save to. + logger: Logger instance (Default value = LOGGER) + gdf: GeoDataFrame: + filename: str | Path: + + Returns: """ start = time.time() logger.info("Starting writing process") @@ -373,20 +375,17 @@ def to_geopackage_chunked( potentially be slower than `to_geopackage`, especially if `chunk_size` is not adequately defined. Therefore, this function should only be required if `to_geopackage` fails because of memory issues. - Parameters - ----------- - gdf - The GeoDataFrame to save. - filename - The filename to save to. - chunk_size - The number of rows per chunk. - logger - Logger instance. - - Returns - -------- - File path of the saved GeoDataFrame. + Args: + gdf: The GeoDataFrame to save. + filename: The filename to save to. + chunk_size: The number of rows per chunk. + logger: Logger instance. + gdf: GeoDataFrame: + filename: str: + chunk_size: int: (Default value = 1000000) + logger: logging.Logger: (Default value = LOGGER) + + Returns: """ filename_path = Path(filename) if filename_path.exists(): @@ -412,18 +411,14 @@ def select_all_within_feature(polygon_feature: gpd.GeoSeries, vector_features: g """ This function is quite small and simple, but exists mostly as a. - Parameters - ---------- - polygon_feature - Polygon feature that will be used to find which features of `vector_features` are contained within it. + Args: + polygon_feature: Polygon feature that will be used to find which features of `vector_features` are contained within it. In this function, it is expected to be a GeoSeries, so a single row from a GeoDataFrame. - vector_features - vector_features - The dataframe containing the features that will be grouped by polygon_feature. + vector_features: The dataframe containing the features that will be grouped by polygon_feature. + polygon_feature: + vector_features: - Returns - ------- - GeoSeries representing the selected features from `vector_features` + Returns: """ contained_features = vector_features[vector_features.within(polygon_feature.geometry)] return contained_features @@ -438,21 +433,16 @@ def add_and_fill_contained_column( The purpose of this function is to first do a spatial search operation on which `vector_features` are within `polygon_feature`, and then write the contents found in the `polygon_column_name` to the selected `vector_features` - Parameters - ---------- - polygon_feature - Polygon feature that will be used to find which features of `vector_features` are contained within it - polygon_column_name - The name of the column in `polygon_feature` that contains the name/id of each polygon to be written to - `vector_features`. - vector_features - The dataframe containing the features that will be grouped by polygon_feature. - vector_column_name - The name of the column in `vector_features` that will the name/id of each polygon. - logger - - Returns - ------- + Args: + polygon_feature: Polygon feature that will be used to find which features of `vector_features` are contained + within it. + polygon_column_name: The name of the column in `polygon_feature` that contains the name/id of each polygon to + be written to `vector_features`. + vector_features: The dataframe containing the features that will be grouped by polygon_feature. + vector_column_name: The name of the column in `vector_features` that will the name/id of each polygon. + logger: (Default value = LOGGER) + + Returns: """ feature_name = polygon_feature[polygon_column_name] logger.info(f"Selecting all vector features that are within {feature_name}") @@ -485,22 +475,20 @@ def find_and_write_all_contained_features( "within" spatial operator. Each feature in `vector_features` will have a list of all the polygons that contain it (contain as being completely within the polygon). - Parameters - ---------- - polygon_features - Dataframes containing polygons. Will be used to find which features of `vector_features` + Args: + polygon_features: Dataframes containing polygons. Will be used to find which features of `vector_features` are contained within which polygon - polygon_column - The name of the column in `polygon_features` that contains the name/id + polygon_column: The name of the column in `polygon_features` that contains the name/id of each polygon. - vector_features - The dataframe containing the features that will be grouped by polygon. - vector_column_name - The name of the column in `vector_features` that will the name/id of each polygon. - logger - - Returns - ------- + vector_features: The dataframe containing the features that will be grouped by polygon. + vector_column_name: The name of the column in `vector_features` that will the name/id of each polygon. + logger: (Default value = LOGGER) + polygon_features: gpd.GeoDataFrame: + polygon_column: str: + vector_features: gpd.GeoDataFrame: + vector_column_name: str: + + Returns: """ if vector_column_name not in vector_features.columns: vector_features[vector_column_name] = [set() for _ in range(len(vector_features))] @@ -537,25 +525,24 @@ def spatial_join_within( It does a spatial join based on a within operation between features to associate which `vector_features` are within which `polygon_features`, groups the results by vector feature - Parameters - ---------- - polygon_features - Dataframes containing polygons. Will be used to find which features of `vector_features` + Args: + polygon_features: Dataframes containing polygons. Will be used to find which features of `vector_features` are contained within which polygon - polygon_column - The name of the column in `polygon_features` that contains the name/id + polygon_column: The name of the column in `polygon_features` that contains the name/id of each polygon. - vector_features - The dataframe containing the features that will be grouped by polygon. - vector_column_name - The name of the column in `vector_features` that will contain the name/id of each polygon. - join_type - predicate - The predicate to use for the spatial join operation. Defaults to `within`. - logger - Logger instance - Returns - ------- + vector_features: The dataframe containing the features that will be grouped by polygon. + vector_column_name: The name of the column in `vector_features` that will contain the name/id of each polygon. + join_type: + predicate: The predicate to use for the spatial join operation. Defaults to `within`. + logger: (Default value = LOGGER) + polygon_features: gpd.GeoDataFrame: + polygon_column: str: + vector_features: gpd.GeoDataFrame: + vector_column_name: str: + join_type: str: (Default value = "left") + predicate: str: (Default value = "within") + + Returns: """ temp_feature_id = "feature_id" uuid_suffix = str(uuid.uuid4()) From e22aa11560aab49fee07f0312b68e34de4806473 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 14:09:50 -0400 Subject: [PATCH 04/12] Add and configure API generation --- docs/mkdocs/css/extra.css | 0 docs/mkdocs/scripts/gen_ref_pages.py | 39 ++++++++ mkdocs.yml | 48 ++++++++-- poetry.lock | 136 +++++++++++++++++++++++++-- pyproject.toml | 8 +- 5 files changed, 217 insertions(+), 14 deletions(-) create mode 100644 docs/mkdocs/css/extra.css create mode 100644 docs/mkdocs/scripts/gen_ref_pages.py diff --git a/docs/mkdocs/css/extra.css b/docs/mkdocs/css/extra.css new file mode 100644 index 0000000..e69de29 diff --git a/docs/mkdocs/scripts/gen_ref_pages.py b/docs/mkdocs/scripts/gen_ref_pages.py new file mode 100644 index 0000000..6760787 --- /dev/null +++ b/docs/mkdocs/scripts/gen_ref_pages.py @@ -0,0 +1,39 @@ +"""Generate the code reference pages.""" + +from pathlib import Path + +import mkdocs_gen_files + +nav = mkdocs_gen_files.Nav() + +root = Path(__file__).parent.parent.parent.parent +src = root / "geospatial_tools" + +for path in sorted(src.rglob("*.py")): + module_path = path.relative_to(src).with_suffix("") + doc_path = path.relative_to(src).with_suffix(".md") + full_doc_path = Path("reference", doc_path) + + parts = tuple(module_path.parts) + + if not parts: + continue + if parts[-1] == "__init__": + parts = parts[:-1] + if not parts: + continue + doc_path = doc_path.with_name("index.md") + full_doc_path = full_doc_path.with_name("index.md") + elif parts[-1] == "__main__": + continue + + nav[parts] = doc_path.as_posix() + + with mkdocs_gen_files.open(full_doc_path, "w") as fd: + ident = ".".join(parts) + fd.write(f"::: {ident}") + + mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root)) + +with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file: + nav_file.writelines(nav.build_literate_nav()) diff --git a/mkdocs.yml b/mkdocs.yml index 036851b..69ced80 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,43 +1,79 @@ site_name: Geospatial Tools docs_dir: docs/mkdocs - nav: - User Guide: - sections/user-guide/user-guide.md - sections/user-guide/notebook-examples.md - - Developer Guide: - - sections/dev-guide/developer-guide.md + - Developer Guide: reference/ theme: name: material + features: + - content.code.copy + - toc.follow + - toc.integrate + - navigation.tracking + palette: - scheme: slate + - media: "(prefers-color-scheme)" + toggle: + icon: material/brightness-auto + name: Switch to light mode + + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: default + + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to system preference markdown_extensions: - admonition + - toc: + permalink: "#" + toc_depth: 3 extra_css: - css/extra.css plugins: - search + - gen-files: + scripts: + - docs/mkdocs/scripts/gen_ref_pages.py + - literate-nav + - section-index - mkdocstrings: default_handler: python handlers: python: paths: [geospatial_tools] options: + inherited_members: true docstring_style: google members_order: source annotations_path: brief show_docstring_attributes: true modernize_annotations: true show_source: true - show_submodules: false + show_submodules: true separate_signature: true signature_crossrefs: true show_signature_annotations: true - allow_inspection: false + allow_inspection: true + show_symbol_type_heading: false + show_symbol_type_toc: true + line_length: 88 + merge_init_into_class: true + diff --git a/poetry.lock b/poetry.lock index 40ea8c2..595cb58 100644 --- a/poetry.lock +++ b/poetry.lock @@ -274,7 +274,7 @@ version = "2.17.0" description = "Internationalization utilities" optional = false python-versions = ">=3.8" -groups = ["lab"] +groups = ["dev", "lab"] files = [ {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, @@ -283,6 +283,26 @@ files = [ [package.extras] dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] +[[package]] +name = "backrefs" +version = "5.9" +description = "A wrapper around re and regex that adds additional back references." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "backrefs-5.9-py310-none-any.whl", hash = "sha256:db8e8ba0e9de81fcd635f440deab5ae5f2591b54ac1ebe0550a2ca063488cd9f"}, + {file = "backrefs-5.9-py311-none-any.whl", hash = "sha256:6907635edebbe9b2dc3de3a2befff44d74f30a4562adbb8b36f21252ea19c5cf"}, + {file = "backrefs-5.9-py312-none-any.whl", hash = "sha256:7fdf9771f63e6028d7fee7e0c497c81abda597ea45d6b8f89e8ad76994f5befa"}, + {file = "backrefs-5.9-py313-none-any.whl", hash = "sha256:cc37b19fa219e93ff825ed1fed8879e47b4d89aa7a1884860e2db64ccd7c676b"}, + {file = "backrefs-5.9-py314-none-any.whl", hash = "sha256:df5e169836cc8acb5e440ebae9aad4bf9d15e226d3bad049cf3f6a5c20cc8dc9"}, + {file = "backrefs-5.9-py39-none-any.whl", hash = "sha256:f48ee18f6252b8f5777a22a00a09a85de0ca931658f1dd96d4406a34f3748c60"}, + {file = "backrefs-5.9.tar.gz", hash = "sha256:808548cb708d66b82ee231f962cb36faaf4f2baab032f2fbb783e9c2fdddaa59"}, +] + +[package.extras] +extras = ["regex"] + [[package]] name = "beautifulsoup4" version = "4.13.4" @@ -439,7 +459,7 @@ version = "2025.4.26" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" -groups = ["main", "lab"] +groups = ["main", "dev", "lab"] files = [ {file = "certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3"}, {file = "certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6"}, @@ -1578,7 +1598,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" -groups = ["main", "lab"] +groups = ["main", "dev", "lab"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -2706,6 +2726,21 @@ Markdown = ">=3.3" markupsafe = ">=2.0.1" mkdocs = ">=1.1" +[[package]] +name = "mkdocs-gen-files" +version = "0.5.0" +description = "MkDocs plugin to programmatically generate documentation pages during the build" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "mkdocs_gen_files-0.5.0-py3-none-any.whl", hash = "sha256:7ac060096f3f40bd19039e7277dd3050be9a453c8ac578645844d4d91d7978ea"}, + {file = "mkdocs_gen_files-0.5.0.tar.gz", hash = "sha256:4c7cf256b5d67062a788f6b1d035e157fc1a9498c2399be9af5257d4ff4d19bc"}, +] + +[package.dependencies] +mkdocs = ">=1.0.3" + [[package]] name = "mkdocs-get-deps" version = "0.2.0" @@ -2723,6 +2758,79 @@ mergedeep = ">=1.3.4" platformdirs = ">=2.2.0" pyyaml = ">=5.1" +[[package]] +name = "mkdocs-literate-nav" +version = "0.6.2" +description = "MkDocs plugin to specify the navigation in Markdown instead of YAML" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocs_literate_nav-0.6.2-py3-none-any.whl", hash = "sha256:0a6489a26ec7598477b56fa112056a5e3a6c15729f0214bea8a4dbc55bd5f630"}, + {file = "mkdocs_literate_nav-0.6.2.tar.gz", hash = "sha256:760e1708aa4be86af81a2b56e82c739d5a8388a0eab1517ecfd8e5aa40810a75"}, +] + +[package.dependencies] +mkdocs = ">=1.4.1" + +[[package]] +name = "mkdocs-material" +version = "9.6.20" +description = "Documentation that simply works" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mkdocs_material-9.6.20-py3-none-any.whl", hash = "sha256:b8d8c8b0444c7c06dd984b55ba456ce731f0035c5a1533cc86793618eb1e6c82"}, + {file = "mkdocs_material-9.6.20.tar.gz", hash = "sha256:e1f84d21ec5fb730673c4259b2e0d39f8d32a3fef613e3a8e7094b012d43e790"}, +] + +[package.dependencies] +babel = ">=2.10,<3.0" +backrefs = ">=5.7.post1,<6.0" +click = "<8.2.2" +colorama = ">=0.4,<1.0" +jinja2 = ">=3.1,<4.0" +markdown = ">=3.2,<4.0" +mkdocs = ">=1.6,<2.0" +mkdocs-material-extensions = ">=1.3,<2.0" +paginate = ">=0.5,<1.0" +pygments = ">=2.16,<3.0" +pymdown-extensions = ">=10.2,<11.0" +requests = ">=2.26,<3.0" + +[package.extras] +git = ["mkdocs-git-committers-plugin-2 (>=1.1,<3)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"] +imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=10.2,<12.0)"] +recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"] + +[[package]] +name = "mkdocs-material-extensions" +version = "1.3.1" +description = "Extension pack for Python Markdown and MkDocs Material." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"}, + {file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"}, +] + +[[package]] +name = "mkdocs-section-index" +version = "0.3.10" +description = "MkDocs plugin to allow clickable sections that lead to an index page" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocs_section_index-0.3.10-py3-none-any.whl", hash = "sha256:bc27c0d0dc497c0ebaee1fc72839362aed77be7318b5ec0c30628f65918e4776"}, + {file = "mkdocs_section_index-0.3.10.tar.gz", hash = "sha256:a82afbda633c82c5568f0e3b008176b9b365bf4bd8b6f919d6eff09ee146b9f8"}, +] + +[package.dependencies] +mkdocs = ">=1.2" + [[package]] name = "mkdocstrings" version = "0.30.0" @@ -3168,6 +3276,22 @@ files = [ {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] +[[package]] +name = "paginate" +version = "0.5.7" +description = "Divides large result sets into pages for easier browsing" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"}, + {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"}, +] + +[package.extras] +dev = ["pytest", "tox"] +lint = ["black"] + [[package]] name = "pandas" version = "2.2.3" @@ -4553,7 +4677,7 @@ version = "2.32.5" description = "Python HTTP for Humans." optional = false python-versions = ">=3.9" -groups = ["main", "lab"] +groups = ["main", "dev", "lab"] files = [ {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"}, {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"}, @@ -5294,7 +5418,7 @@ version = "2.4.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" -groups = ["main", "lab"] +groups = ["main", "dev", "lab"] files = [ {file = "urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813"}, {file = "urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466"}, @@ -5530,4 +5654,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "f6dab5d9fbfcc1e7f3f109218c5a20aa9eb487feade9079d733466e8a588a639" +content-hash = "5f2f52f741a0c7b628f55917c4ba39732064fe904bd7c9fe79ab67087d94982a" diff --git a/pyproject.toml b/pyproject.toml index f15fc1a..a48b9de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,10 @@ mdformat-gfm-alerts = "^2.0.0" mkdocs = "^1.6.1" mkdocstrings-python = "^1.18.2" pyment = "^0.3.3" +mkdocs-material = "^9.6.20" +mkdocs-gen-files = "^0.5.0" +mkdocs-literate-nav = "^0.6.2" +mkdocs-section-index = "^0.3.10" [tool.poetry.group.lab.dependencies] jupyterlab = "^4.0.10" @@ -158,7 +162,7 @@ transform-concats = true verbose = true [tool.docformatter] -style = "numpy" +style = "google" pre-summary-newline = true wrap-descriptions = 120 wrap-summaries = 120 @@ -218,7 +222,7 @@ ignore = [ ] [tool.ruff.lint.pydocstyle] -convention = "numpy" # Corresponds to flake8's docstring-convention and docformatter style. +convention = "google" [tool.ruff.lint.pylint] max-args = 16 From 186c08a32cc078a55e9ce65d15d849088421c189 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 14:10:03 -0400 Subject: [PATCH 05/12] Update existing docs --- docs/mkdocs/sections/dev-guide/developer-guide.md | 3 +-- docs/mkdocs/sections/user-guide/notebook-examples.md | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/mkdocs/sections/dev-guide/developer-guide.md b/docs/mkdocs/sections/dev-guide/developer-guide.md index 8291c26..c213c80 100644 --- a/docs/mkdocs/sections/dev-guide/developer-guide.md +++ b/docs/mkdocs/sections/dev-guide/developer-guide.md @@ -1,2 +1 @@ -# Stac -::: geospatial_tools.stac +# Dev Guide \ No newline at end of file diff --git a/docs/mkdocs/sections/user-guide/notebook-examples.md b/docs/mkdocs/sections/user-guide/notebook-examples.md index bd2b320..353134c 100644 --- a/docs/mkdocs/sections/user-guide/notebook-examples.md +++ b/docs/mkdocs/sections/user-guide/notebook-examples.md @@ -3,4 +3,4 @@ There are a few notebook examples available. - [How to use STAC API](https://github.com/RolnickLab/geospatial-tools/blob/main/notebooks/stac_api_tools.ipynb) -- [Exploring Sentinel 2 data from Planetary Computer](https://github.com/RolnickLab/geospatial-tools/blob/main/notebooks/planetary_computer_sentinel2_exploration.ipynb) \ No newline at end of file +- [Exploring Sentinel 2 data from Planetary Computer](https://github.com/RolnickLab/geospatial-tools/blob/main/notebooks/planetary_computer_sentinel2_exploration.ipynb) From 8fd350f1950e03e4ba58ad7635580c9aa0f54560 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 14:33:21 -0400 Subject: [PATCH 06/12] Add base index file for mkdocs --- docs/mkdocs/index.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/mkdocs/index.md diff --git a/docs/mkdocs/index.md b/docs/mkdocs/index.md new file mode 100644 index 0000000..4b2e1bf --- /dev/null +++ b/docs/mkdocs/index.md @@ -0,0 +1 @@ +# Geospatial tools \ No newline at end of file From 75ba39fdef5860c86580d7492bbe2aedda4fc74d Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 14:47:49 -0400 Subject: [PATCH 07/12] Remove docstring rule from pre-commit --- .pre-commit-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5057701..55db2f2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,6 @@ repos: - id: debug-statements - id: check-builtin-literals - id: check-case-conflict - - id: check-docstring-first - id: detect-private-key - id: check-added-large-files args: ["--maxkb=5000"] From c2921afe510d15eaac741ecbce2dbcd047a8fe92 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 14:48:43 -0400 Subject: [PATCH 08/12] Add geotypes docstrings and add details to mkdocs config --- geospatial_tools/geotools_types.py | 5 +++++ mkdocs.yml | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/geospatial_tools/geotools_types.py b/geospatial_tools/geotools_types.py index 4039ec4..b366699 100644 --- a/geospatial_tools/geotools_types.py +++ b/geospatial_tools/geotools_types.py @@ -14,7 +14,11 @@ ) BBoxLike = tuple[float, float, float, float] +"""BBox like tuple structure used for type checking.""" + IntersectsLike = Union[Point, Polygon, LineString, MultiPolygon, MultiPoint, MultiLineString, GeometryCollection] +"""Intersect-like union of types used for type checking.""" + DateLike = Union[ datetime, str, @@ -23,3 +27,4 @@ list[Union[datetime, str, None]], Iterator[Union[datetime, str, None]], ] +"""Date-like union of types used for type checking.""" diff --git a/mkdocs.yml b/mkdocs.yml index 69ced80..847b9bc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,4 +1,6 @@ site_name: Geospatial Tools +site_description: A package contain classes and utilities to help with geospatial data +repo_url: https://github.com/RolnickLab/geospatial-tools docs_dir: docs/mkdocs nav: @@ -13,7 +15,6 @@ theme: features: - content.code.copy - toc.follow - - toc.integrate - navigation.tracking palette: From 26adce1ede52ffb13e475987ccf611079e851dd0 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 14:58:50 -0400 Subject: [PATCH 09/12] Fill in index.md --- docs/mkdocs/index.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/docs/mkdocs/index.md b/docs/mkdocs/index.md index 4b2e1bf..d9dd7d9 100644 --- a/docs/mkdocs/index.md +++ b/docs/mkdocs/index.md @@ -1 +1,26 @@ -# Geospatial tools \ No newline at end of file +# Geospatial tools + +## Description + +This repository is a collection of tools and scripts for geospatial use cases. + +For more detailed information on how to install, configure and develop a project +using this repository, please refer yourself to the project's +[README](https://github.com/RolnickLab/geospatial-tools/blob/main/README.md) + +## Requirements + +This project has only been tested in a Linux (Debian based) environment and assumes +some basic tools for development are already installed. + +The project uses a Makefile to automate most operations. If `make` is available on your +machine there's a good chance this will work. + +## Python Version + +This project uses Python version 3.11 + +## Build Tool + +This project uses [`poetry`](https://python-poetry.org/) as a build tool. Using a build tool has the advantage of +streamlining script use as well as fix path issues related to imports. From ec2f985b6ff9647811bf7a11a77a8d11f2a957fe Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 15:06:49 -0400 Subject: [PATCH 10/12] Add mkdocs targets and publishing workflow --- .github/workflows/publish-gh-pages.yml | 52 ++++++++++++++++++++++++++ Makefile.targets | 32 +++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/publish-gh-pages.yml diff --git a/.github/workflows/publish-gh-pages.yml b/.github/workflows/publish-gh-pages.yml new file mode 100644 index 0000000..463ace9 --- /dev/null +++ b/.github/workflows/publish-gh-pages.yml @@ -0,0 +1,52 @@ +name: Publish gh-pages + +on: + push: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + Publish-gh-pages: + if: github.event.pull_request.draft == false + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Poetry + run: | + pip install poetry + + - name: Cache Poetry virtualenv and dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pypoetry + ~/.cache/pip + key: poetry-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} + restore-keys: | + poetry-${{ runner.os }}- + + - name: Install dependencies + run: | + make install + + - name: Run mkdocs deploy + run: | + poetry run mkdocs gh-deploy + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/Makefile.targets b/Makefile.targets index e4f8a33..7db8e8d 100644 --- a/Makefile.targets +++ b/Makefile.targets @@ -5,4 +5,34 @@ .PHONY: test-notebooks test-notebooks: ## Execute test notebooks using pytest and nbval - $(ENV_COMMAND_TOOL) nox -s test_nb \ No newline at end of file + $(ENV_COMMAND_TOOL) nox -s test_nb + +## -- Docs targets -------------------------------------------------------------------------------------------------- ## +.PHONY: preview-docs +preview-docs: ## Preview the documentation site locally + @poetry run mkdocs serve -a 0.0.0.0:7000 + + +.PHONY: build-docs +build-docs: ## Build the documentation files locally + @poetry run mkdocs build + +.PHONY: deploy-docs +deploy-docs: ## Publish and deploy the documentation to the live Github page + @echo""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m This command will deploy all current changes to the live Github page - Making it publicly available"; \ + echo""; \ + echo -n "Would you like to deploys the docs? [Y/n]: "; \ + read ans; \ + case $$ans in \ + [Yy]*) \ + echo""; \ + poetry run mkdocs gh-deploy; \ + echo""; \ + ;; \ + *) \ + echo""; \ + echo "Skipping publication to Github Pages."; \ + echo " "; \ + ;; \ + esac; \ \ No newline at end of file From 0f378f85563234dbf7ed23cc40f414a2b235c126 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 15:13:38 -0400 Subject: [PATCH 11/12] Fix Pylint errors --- .../planetary_computer/sentinel_2.py | 6 +++--- geospatial_tools/stac.py | 16 +++++++++------- geospatial_tools/vector.py | 4 ++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/geospatial_tools/planetary_computer/sentinel_2.py b/geospatial_tools/planetary_computer/sentinel_2.py index 1e60c61..98b0339 100644 --- a/geospatial_tools/planetary_computer/sentinel_2.py +++ b/geospatial_tools/planetary_computer/sentinel_2.py @@ -72,7 +72,7 @@ def __init__( @property def max_cloud_cover(self): - """""" + """Max % of cloud cover used for Sentinel 2 product search.""" return self._max_cloud_cover @max_cloud_cover.setter @@ -90,7 +90,7 @@ def max_cloud_cover(self, max_cloud_cover: int): @property def date_ranges(self): - """""" + """Date range used to search for Sentinel 2 products.""" return self._date_ranges @date_ranges.setter @@ -181,7 +181,7 @@ def find_best_complete_products(self, max_cloud_cover: int | None = None, max_no return self.successful_results def select_best_products_per_feature(self) -> GeoDataFrame: - """""" + """Return a GeoDataFrame containing the best products for each Sentinel 2 tile.""" spatial_join_results = spatial_join_within( polygon_features=self.sentinel2_tiling_grid, polygon_column=self.sentinel2_tiling_grid_column, diff --git a/geospatial_tools/stac.py b/geospatial_tools/stac.py index 174b407..94b6339 100644 --- a/geospatial_tools/stac.py +++ b/geospatial_tools/stac.py @@ -91,7 +91,11 @@ def list_available_catalogs(logger: logging.Logger = LOGGER) -> frozenset[str]: class AssetSubItem: - """""" + """ + Class that represent a STAC asset sub item. + + Generally represents a single satellite image band. + """ def __init__(self, asset, item_id: str, band: str, filename: str | Path): """ @@ -111,7 +115,7 @@ def __init__(self, asset, item_id: str, band: str, filename: str | Path): class Asset: - """""" + """Represents a STAC asset.""" def __init__( self, @@ -240,7 +244,7 @@ def reproject_merged_asset( return None def delete_asset_sub_items(self): - """""" + """Delete all asset sub items that belong to this asset.""" self.logger.info(f"Deleting asset sub items from asset [{self.asset_id}]") if self.list: for item in self.list: @@ -248,24 +252,22 @@ def delete_asset_sub_items(self): item.filename.unlink() def delete_merged_asset(self): - """""" + """Delete merged asset.""" self.logger.info(f"Deleting merged asset file for [{self.merged_asset_path}]") self.merged_asset_path.unlink() def delete_reprojected_asset(self): - """""" + """Delete reprojected asset.""" self.logger.info(f"Deleting reprojected asset file for [{self.reprojected_asset_path}]") self.reprojected_asset_path.unlink() def _create_merged_asset_metadata(self): - """""" self.logger.info("Creating merged asset metadata") file_list = [asset.filename for asset in self.list] meta = create_merged_raster_bands_metadata(file_list) return meta def _get_asset_total_bands(self): - """""" downloaded_file_list = [asset.filename for asset in self.list] total_band_count = get_total_band_count(downloaded_file_list) return total_band_count diff --git a/geospatial_tools/vector.py b/geospatial_tools/vector.py index 2c0475f..46d8eba 100644 --- a/geospatial_tools/vector.py +++ b/geospatial_tools/vector.py @@ -412,8 +412,8 @@ def select_all_within_feature(polygon_feature: gpd.GeoSeries, vector_features: g This function is quite small and simple, but exists mostly as a. Args: - polygon_feature: Polygon feature that will be used to find which features of `vector_features` are contained within it. - In this function, it is expected to be a GeoSeries, so a single row from a GeoDataFrame. + polygon_feature: Polygon feature that will be used to find which features of `vector_features` are contained + within it. In this function, it is expected to be a GeoSeries, so a single row from a GeoDataFrame. vector_features: The dataframe containing the features that will be grouped by polygon_feature. polygon_feature: vector_features: From 7bc0de20a82ee1595c91f31beee53699fae692d3 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 24 Sep 2025 15:16:41 -0400 Subject: [PATCH 12/12] Only run checks on Pull Requests, not Pushes --- .github/workflows/lint.yml | 2 +- .github/workflows/pre-commit.yml | 2 +- .github/workflows/test.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 01a0710..6d667a6 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,6 +1,6 @@ name: Lint -on: [pull_request, push] +on: [pull_request] concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 5036281..e14156f 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,6 +1,6 @@ name: Pre-commit -on: [pull_request, push] +on: [pull_request] concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4f0820f..352cb2c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,6 @@ name: Test -on: [pull_request, push] +on: [pull_request] concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}