From b1773fb15b667d5e51a4a448e219a51d337170e5 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sun, 31 May 2026 13:17:58 -0500 Subject: [PATCH] Clean up nemo-retriever dependencies Signed-off-by: Cole McIntosh --- nemo_retriever/pyproject.toml | 20 +++- nemo_retriever/uv.lock | 178 +++++++++++++++++++++++++++++++--- 2 files changed, 177 insertions(+), 21 deletions(-) diff --git a/nemo_retriever/pyproject.toml b/nemo_retriever/pyproject.toml index 355f23f58..1a5425e71 100644 --- a/nemo_retriever/pyproject.toml +++ b/nemo_retriever/pyproject.toml @@ -52,26 +52,34 @@ dependencies = [ # HTTP clients "httpx>=0.27.0", "requests>=2.32.5", + "aiohttp>=3.12.0", "urllib3>=2.7.0", # Utilities "pydantic>=2.8.0", "rich>=13.7.0", "universal-pathlib>=0.2.0", "numpy>=1.26.0", + "python-dateutil>=2.9.0", "debugpy>=1.8.0", "fsspec>=2025.5.1", "s3fs>=2025.5.1", + "fastparquet>=2024.11.0,<2026", # Core ingest packages # Document parsing and NIM client libs "pypdfium2==4.30.0", "pillow==12.2.0", + "opencv-python-headless>=4.8.0", + "scikit-learn>=1.6.0", + "scipy>=1.11.0", "nltk>=3.9.4", "markitdown", "langchain-nvidia-ai-endpoints>=0.3.0", + "unstructured-client", # Default VDB solution "lancedb", # gRPC client for Parakeet/Riva ASR. Required for ASRCPUActor when it # targets the public NVCF Parakeet endpoint (the default) or any remote NIM. + "grpcio", "nvidia-riva-client>=2.25.1", ] @@ -94,7 +102,6 @@ service = [ "glom", "easydict", "addict", - "scikit-learn>=1.6.0", "psutil>=5.9.0", "apscheduler>=3.10", # Audio resampling used by ParakeetClient @@ -109,7 +116,6 @@ local = [ "transformers>=4.57.6,<5", "tokenizers>=0.21.1", "accelerate==1.12.0", - "opencv-python-headless>=4.8.0", "torch==2.11.0; sys_platform == 'linux'", "torch==2.11.0; sys_platform == 'win32'", "torch==2.11.0; sys_platform == 'darwin'", @@ -120,7 +126,6 @@ local = [ "einops", "easydict", "addict", - "scikit-learn>=1.6.0", "timm==1.0.22", "albumentations==2.0.8", "nemotron-page-elements-v3>=0.dev0", @@ -141,11 +146,10 @@ local = [ ] # ── Multimedia — audio/ASR and SVG rendering ──────────────────────────────── -# soundfile + scipy enable local Parakeet ASR on audio/video content. +# soundfile enables local Parakeet ASR on audio/video content. # cairosvg enables SVG-to-image rendering (requires libcairo system library). multimedia = [ "soundfile>=0.12.0", - "scipy>=1.11.0", "cairosvg>=2.7.0", "librosa>=0.10.2", ] @@ -189,6 +193,12 @@ dev = [ "pytest>=8.0.2", ] +test = [ + # MoviePy 2.x caps Pillow below 12; core currently pins Pillow 12.2.0. + "moviepy<2", + "pytest>=8.0.2", +] + # ── Convenience: full install ───────────────────────────────────────────────── all = [ "nemo_retriever[service,local,multimedia,nemotron-parse,tabular,benchmarks,llm]", diff --git a/nemo_retriever/uv.lock b/nemo_retriever/uv.lock index aa7ab1fb8..3e9b65e7a 100644 --- a/nemo_retriever/uv.lock +++ b/nemo_retriever/uv.lock @@ -85,6 +85,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/25/da1f0b4dd970e52bf5a36c204c107e11a0c6d3ed195eba0bfbc664c312b2/aiofile-3.9.0-py3-none-any.whl", hash = "sha256:ce2f6c1571538cbdfa0143b04e16b208ecb0e9cb4148e528af8a640ed51cc8aa", size = 19539, upload-time = "2024-10-08T10:39:32.955Z" }, ] +[[package]] +name = "aiofiles" +version = "25.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" }, +] + [[package]] name = "aiohappyeyeballs" version = "2.6.1" @@ -605,6 +614,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" }, ] +[[package]] +name = "cramjam" +version = "2.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/14/12/34bf6e840a79130dfd0da7badfb6f7810b8fcfd60e75b0539372667b41b6/cramjam-2.11.0.tar.gz", hash = "sha256:5c82500ed91605c2d9781380b378397012e25127e89d64f460fea6aeac4389b4", size = 99100, upload-time = "2025-07-27T21:25:07.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/0d/7c84c913a5fae85b773a9dcf8874390f9d68ba0fcc6630efa7ff1541b950/cramjam-2.11.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:dba5c14b8b4f73ea1e65720f5a3fe4280c1d27761238378be8274135c60bbc6e", size = 3553368, upload-time = "2025-07-27T21:22:27.162Z" }, + { url = "https://files.pythonhosted.org/packages/2b/cc/4f6d185d8a744776f53035e72831ff8eefc2354f46ab836f4bd3c4f6c138/cramjam-2.11.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:11eb40722b3fcf3e6890fba46c711bf60f8dc26360a24876c85e52d76c33b25b", size = 1860014, upload-time = "2025-07-27T21:22:28.738Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a8/626c76263085c6d5ded0e71823b411e9522bfc93ba6cc59855a5869296e7/cramjam-2.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aeb26e2898994b6e8319f19a4d37c481512acdcc6d30e1b5ecc9d8ec57e835cb", size = 1693512, upload-time = "2025-07-27T21:22:30.999Z" }, + { url = "https://files.pythonhosted.org/packages/e9/52/0851a16a62447532e30ba95a80e638926fdea869a34b4b5b9d0a020083ba/cramjam-2.11.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f8d82081ed7d8fe52c982bd1f06e4c7631a73fe1fb6d4b3b3f2404f87dc40fe", size = 2025285, upload-time = "2025-07-27T21:22:32.954Z" }, + { url = "https://files.pythonhosted.org/packages/98/76/122e444f59dbc216451d8e3d8282c9665dc79eaf822f5f1470066be1b695/cramjam-2.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:092a3ec26e0a679305018380e4f652eae1b6dfe3fc3b154ee76aa6b92221a17c", size = 1761327, upload-time = "2025-07-27T21:22:34.484Z" }, + { url = "https://files.pythonhosted.org/packages/a3/bc/3a0189aef1af2b29632c039c19a7a1b752bc21a4053582a5464183a0ad3d/cramjam-2.11.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:529d6d667c65fd105d10bd83d1cd3f9869f8fd6c66efac9415c1812281196a92", size = 1854075, upload-time = "2025-07-27T21:22:36.157Z" }, + { url = "https://files.pythonhosted.org/packages/2e/80/8a6343b13778ce52d94bb8d5365a30c3aa951276b1857201fe79d7e2ad25/cramjam-2.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:555eb9c90c450e0f76e27d9ff064e64a8b8c6478ab1a5594c91b7bc5c82fd9f0", size = 2032710, upload-time = "2025-07-27T21:22:38.17Z" }, + { url = "https://files.pythonhosted.org/packages/df/6b/cd1778a207c29eda10791e3dfa018b588001928086e179fc71254793c625/cramjam-2.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5edf4c9e32493035b514cf2ba0c969d81ccb31de63bd05490cc8bfe3b431674e", size = 2068353, upload-time = "2025-07-27T21:22:39.615Z" }, + { url = "https://files.pythonhosted.org/packages/dc/f0/5c2a5cd5711032f3b191ca50cb786c17689b4a9255f9f768866e6c9f04d9/cramjam-2.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa2fe41f48c4d58d923803383b0737f048918b5a0d10390de9628bb6272b107", size = 1978104, upload-time = "2025-07-27T21:22:41.106Z" }, + { url = "https://files.pythonhosted.org/packages/f9/8b/b363a5fb2c3347504fe9a64f8d0f1e276844f0e532aa7162c061cd1ffee4/cramjam-2.11.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9ca14cf1cabdb0b77d606db1bb9e9ca593b1dbd421fcaf251ec9a5431ec449f3", size = 2030779, upload-time = "2025-07-27T21:22:42.969Z" }, + { url = "https://files.pythonhosted.org/packages/78/7b/d83dad46adb6c988a74361f81ad9c5c22642be53ad88616a19baedd06243/cramjam-2.11.0-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:309e95bf898829476bccf4fd2c358ec00e7ff73a12f95a3cdeeba4bb1d3683d5", size = 2155297, upload-time = "2025-07-27T21:22:44.6Z" }, + { url = "https://files.pythonhosted.org/packages/1a/be/60d9be4cb33d8740a4aa94c7513f2ef3c4eba4fd13536f086facbafade71/cramjam-2.11.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:86dca35d2f15ef22922411496c220f3c9e315d5512f316fe417461971cc1648d", size = 2169255, upload-time = "2025-07-27T21:22:46.534Z" }, + { url = "https://files.pythonhosted.org/packages/11/b0/4a595f01a243aec8ad272b160b161c44351190c35d98d7787919d962e9e5/cramjam-2.11.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:193c6488bd2f514cbc0bef5c18fad61a5f9c8d059dd56edf773b3b37f0e85496", size = 2155651, upload-time = "2025-07-27T21:22:48.46Z" }, + { url = "https://files.pythonhosted.org/packages/38/47/7776659aaa677046b77f527106e53ddd47373416d8fcdb1e1a881ec5dc06/cramjam-2.11.0-cp312-cp312-win32.whl", hash = "sha256:514e2c008a8b4fa823122ca3ecab896eac41d9aa0f5fc881bd6264486c204e32", size = 1603568, upload-time = "2025-07-27T21:22:50.084Z" }, + { url = "https://files.pythonhosted.org/packages/75/b1/d53002729cfd94c5844ddfaf1233c86d29f2dbfc1b764a6562c41c044199/cramjam-2.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:53fed080476d5f6ad7505883ec5d1ec28ba36c2273db3b3e92d7224fe5e463db", size = 1709287, upload-time = "2025-07-27T21:22:51.534Z" }, +] + [[package]] name = "cryptography" version = "48.0.0" @@ -808,11 +840,11 @@ wheels = [ [[package]] name = "decorator" -version = "5.2.1" +version = "4.4.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/93/84fa12f2dc341f8cf5f022ee09e109961055749df2d0c75c5f98746cfe6c/decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7", size = 33629, upload-time = "2020-02-29T05:24:43.312Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/72a1821152d07cf1d8b6fce298aeb06a7eb90f4d6d41acec9861e7cc6df0/decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760", size = 9239, upload-time = "2020-02-29T05:24:45.993Z" }, ] [[package]] @@ -1116,6 +1148,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/76/b310d52fa0e30d39bd937eb58ec2c1f1ea1b5f519f0575e9dd9612f01deb/fastmcp-3.2.4-py3-none-any.whl", hash = "sha256:e6c9c429171041455e47ab94bb3f83c4657622a0ec28922f6940053959bd58a9", size = 728599, upload-time = "2026-04-14T01:42:26.85Z" }, ] +[[package]] +name = "fastparquet" +version = "2025.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cramjam" }, + { name = "fsspec" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1e/ad/87f7f5750685e8e0a359d732c85332481ba9b5723af579f8755f81154d0b/fastparquet-2025.12.0.tar.gz", hash = "sha256:85f807d3846c7691855a68ed7ff6ee40654b72b997f5b1199e6310a1e19d1cd5", size = 480045, upload-time = "2025-12-18T16:22:22.016Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/b2/229a4482d80a737d0fe6706c4f93adb631f42ec5b0a2b154247d63bb48fe/fastparquet-2025.12.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:27b1cf0557ddddbf0e28db64d4d3bea1384be1d245b2cef280d001811e3600fe", size = 896986, upload-time = "2025-12-18T21:53:52.611Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c2/953117c43bf617379eff79ce8a2318ef49f7f41908faade051fa12281ac8/fastparquet-2025.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9356c59e48825d61719960ccb9ce799ad5cd1b04f2f13368f03fab1f3c645d1e", size = 687642, upload-time = "2025-12-18T21:54:13.594Z" }, + { url = "https://files.pythonhosted.org/packages/92/35/41deaa9a4fc9ab6c00f3b49afe56cbafee13a111032aa41f23d077b69ad6/fastparquet-2025.12.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c4c92e299a314d4b542dc881eeb4d587dc075c0a5a86c07ccf171d8852e9736d", size = 1764260, upload-time = "2025-12-18T21:58:11.197Z" }, + { url = "https://files.pythonhosted.org/packages/1a/0f/a229b3f699aaccc7b5ec3f5e21cff8aa99bc199499bff08cf38bc6ab52c6/fastparquet-2025.12.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4881dc91c7e6d1d08cda9968ed1816b0c66a74b1826014c26713cad923aaca71", size = 1810920, upload-time = "2025-12-18T21:57:31.514Z" }, + { url = "https://files.pythonhosted.org/packages/90/c2/ca76afca0c2debef368a42a701d501e696490e0a7138f0337709a724b189/fastparquet-2025.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8d70d90614f19752919037c4a88aaaeda3cd7667aeb54857c48054e2a9e3588", size = 1819692, upload-time = "2025-12-18T21:58:43.095Z" }, + { url = "https://files.pythonhosted.org/packages/ab/41/f235c0d8171f6676b9d4fb8468c781fbe7bf90fed2c4383f2d8d82e574db/fastparquet-2025.12.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8e2ccf387f629cb11b72fec6f15a55e0f40759b47713124764a9867097bcd377", size = 1784357, upload-time = "2025-12-18T21:58:13.258Z" }, + { url = "https://files.pythonhosted.org/packages/29/7e/c86bf33b363cf5a1ad71d3ebd4a352131ba99566c78aa58d9e56c98526ba/fastparquet-2025.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1978e7f3c32044f2f7a0b35784240dfc3eaeb8065a879fa3011c832fea4e7037", size = 1815777, upload-time = "2025-12-18T21:58:44.432Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0b/769333ab6e6ed401755b550b3338cee96b8f6502db5da55312d86a97db62/fastparquet-2025.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:25e87fff63c011fe658a7547ba83355e02568db1ee26a65e6b75c2287701d5dc", size = 667555, upload-time = "2026-01-06T21:24:36.381Z" }, +] + [[package]] name = "fastsafetensors" version = "0.3.1" @@ -1565,6 +1620,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" }, ] +[[package]] +name = "imageio" +version = "2.37.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/84/93bcd1300216ea50811cee96873b84a1bebf8d0489ffaf7f2a3756bab866/imageio-2.37.3.tar.gz", hash = "sha256:bbb37efbfc4c400fcd534b367b91fcd66d5da639aaa138034431a1c5e0a41451", size = 389673, upload-time = "2026-03-09T11:31:12.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646, upload-time = "2026-03-09T11:31:10.771Z" }, +] + +[[package]] +name = "imageio-ffmpeg" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/bd/c3343c721f2a1b0c9fc71c1aebf1966a3b7f08c2eea8ed5437a2865611d6/imageio_ffmpeg-0.6.0.tar.gz", hash = "sha256:e2556bed8e005564a9f925bb7afa4002d82770d6b08825078b7697ab88ba1755", size = 25210, upload-time = "2025-01-16T21:34:32.747Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/58/87ef68ac83f4c7690961bce288fd8e382bc5f1513860fc7f90a9c1c1c6bf/imageio_ffmpeg-0.6.0-py3-none-macosx_10_9_intel.macosx_10_9_x86_64.whl", hash = "sha256:9d2baaf867088508d4a3458e61eeb30e945c4ad8016025545f66c4b5aaef0a61", size = 24932969, upload-time = "2025-01-16T21:34:20.464Z" }, + { url = "https://files.pythonhosted.org/packages/40/5c/f3d8a657d362cc93b81aab8feda487317da5b5d31c0e1fdfd5e986e55d17/imageio_ffmpeg-0.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b1ae3173414b5fc5f538a726c4e48ea97edc0d2cdc11f103afee655c463fa742", size = 21113891, upload-time = "2025-01-16T21:34:00.277Z" }, + { url = "https://files.pythonhosted.org/packages/33/e7/1925bfbc563c39c1d2e82501d8372734a5c725e53ac3b31b4c2d081e895b/imageio_ffmpeg-0.6.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1d47bebd83d2c5fc770720d211855f208af8a596c82d17730aa51e815cdee6dc", size = 25632706, upload-time = "2025-01-16T21:33:53.475Z" }, + { url = "https://files.pythonhosted.org/packages/a0/2d/43c8522a2038e9d0e7dbdf3a61195ecc31ca576fb1527a528c877e87d973/imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c7e46fcec401dd990405049d2e2f475e2b397779df2519b544b8aab515195282", size = 29498237, upload-time = "2025-01-16T21:34:13.726Z" }, + { url = "https://files.pythonhosted.org/packages/a0/13/59da54728351883c3c1d9fca1710ab8eee82c7beba585df8f25ca925f08f/imageio_ffmpeg-0.6.0-py3-none-win32.whl", hash = "sha256:196faa79366b4a82f95c0f4053191d2013f4714a715780f0ad2a68ff37483cc2", size = 19652251, upload-time = "2025-01-16T21:34:06.812Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824, upload-time = "2025-01-16T21:34:28.6Z" }, +] + [[package]] name = "importlib-metadata" version = "8.7.1" @@ -2348,6 +2430,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/98/6af411189d9413534c3eb691182bff1f5c6d44ed2f93f2edfe52a1bbceb8/more_itertools-11.0.2-py3-none-any.whl", hash = "sha256:6e35b35f818b01f691643c6c611bc0902f2e92b46c18fffa77ae1e7c46e912e4", size = 71939, upload-time = "2026-04-09T15:01:32.21Z" }, ] +[[package]] +name = "moviepy" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "decorator" }, + { name = "imageio" }, + { name = "imageio-ffmpeg" }, + { name = "numpy" }, + { name = "proglog" }, + { name = "requests" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/18/54/01a8c4e35c75ca9724d19a7e4de9dc23f0ceb8769102c7de056113af61c3/moviepy-1.0.3.tar.gz", hash = "sha256:2884e35d1788077db3ff89e763c5ba7bfddbd7ae9108c9bc809e7ba58fa433f5", size = 388311, upload-time = "2020-05-07T16:27:46.856Z" } + [[package]] name = "mpmath" version = "1.3.0" @@ -2432,13 +2529,16 @@ wheels = [ name = "nemo-retriever" source = { editable = "." } dependencies = [ + { name = "aiohttp" }, { name = "backoff" }, { name = "click" }, { name = "debugpy" }, { name = "fastapi" }, { name = "fastmcp" }, + { name = "fastparquet" }, { name = "ffmpeg-python" }, { name = "fsspec" }, + { name = "grpcio" }, { name = "httpx" }, { name = "lancedb" }, { name = "langchain-nvidia-ai-endpoints" }, @@ -2446,21 +2546,26 @@ dependencies = [ { name = "nltk" }, { name = "numpy" }, { name = "nvidia-riva-client" }, + { name = "opencv-python-headless" }, { name = "pandas" }, { name = "pillow" }, { name = "prometheus-fastapi-instrumentator" }, { name = "pydantic" }, { name = "pypdfium2" }, + { name = "python-dateutil" }, { name = "python-multipart" }, { name = "pyyaml" }, { name = "ray", extra = ["data", "serve"] }, { name = "requests" }, { name = "rich" }, { name = "s3fs" }, + { name = "scikit-learn" }, + { name = "scipy" }, { name = "sqlglot" }, { name = "tqdm" }, { name = "typer" }, { name = "universal-pathlib" }, + { name = "unstructured-client" }, { name = "urllib3" }, { name = "uvicorn", extra = ["standard"] }, ] @@ -2490,12 +2595,8 @@ all = [ { name = "nemotron-table-structure-v1" }, { name = "neo4j" }, { name = "nvidia-ml-py" }, - { name = "nvidia-riva-client" }, { name = "open-clip-torch" }, - { name = "opencv-python-headless" }, { name = "psutil" }, - { name = "scikit-learn" }, - { name = "scipy" }, { name = "soundfile" }, { name = "timm" }, { name = "tokenizers" }, @@ -2533,9 +2634,7 @@ local = [ { name = "nemotron-page-elements-v3" }, { name = "nemotron-table-structure-v1" }, { name = "nvidia-ml-py" }, - { name = "opencv-python-headless" }, { name = "psutil" }, - { name = "scikit-learn" }, { name = "timm" }, { name = "tokenizers" }, { name = "torch", version = "2.11.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" }, @@ -2549,7 +2648,6 @@ local = [ multimedia = [ { name = "cairosvg" }, { name = "librosa" }, - { name = "scipy" }, { name = "soundfile" }, ] nemotron-parse = [ @@ -2563,7 +2661,6 @@ service = [ { name = "glom" }, { name = "librosa" }, { name = "psutil" }, - { name = "scikit-learn" }, ] tabular = [ { name = "duckdb" }, @@ -2571,12 +2668,17 @@ tabular = [ { name = "langgraph" }, { name = "neo4j" }, ] +test = [ + { name = "moviepy" }, + { name = "pytest" }, +] [package.metadata] requires-dist = [ { name = "accelerate", marker = "extra == 'local'", specifier = "==1.12.0" }, { name = "addict", marker = "extra == 'local'" }, { name = "addict", marker = "extra == 'service'" }, + { name = "aiohttp", specifier = ">=3.12.0" }, { name = "albumentations", marker = "extra == 'local'", specifier = "==2.0.8" }, { name = "apscheduler", marker = "extra == 'local'", specifier = ">=3.10" }, { name = "apscheduler", marker = "extra == 'service'", specifier = ">=3.10" }, @@ -2594,12 +2696,14 @@ requires-dist = [ { name = "einops", marker = "extra == 'local'" }, { name = "fastapi", specifier = ">=0.114.0" }, { name = "fastmcp", specifier = ">=2.0.0" }, + { name = "fastparquet", specifier = ">=2024.11.0,<2026" }, { name = "ffmpeg-python" }, { name = "flashinfer-cubin", marker = "sys_platform == 'linux' and extra == 'local'", specifier = "==0.6.8.post1" }, { name = "flashinfer-python", marker = "sys_platform == 'linux' and extra == 'local'", specifier = "==0.6.8.post1" }, { name = "fsspec", specifier = ">=2025.5.1" }, { name = "glom", marker = "extra == 'local'" }, { name = "glom", marker = "extra == 'service'" }, + { name = "grpcio" }, { name = "httpx", specifier = ">=0.27.0" }, { name = "lancedb" }, { name = "langchain-nvidia-ai-endpoints", specifier = ">=0.3.0" }, @@ -2608,6 +2712,7 @@ requires-dist = [ { name = "librosa", marker = "extra == 'service'", specifier = ">=0.10.2" }, { name = "litellm", marker = "extra == 'llm'", specifier = ">=1.86.0rc1" }, { name = "markitdown" }, + { name = "moviepy", marker = "extra == 'test'", specifier = "<2" }, { name = "nemo-retriever", extras = ["benchmarks", "llm", "local", "multimedia", "nemotron-parse", "service", "tabular"], marker = "extra == 'all'" }, { name = "nemotron-graphic-elements-v1", marker = "extra == 'local'", specifier = ">=0.dev0", index = "https://test.pypi.org/simple/" }, { name = "nemotron-ocr", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'local') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'local')", specifier = ">=2.0.0.dev0", index = "https://test.pypi.org/simple/" }, @@ -2620,7 +2725,7 @@ requires-dist = [ { name = "nvidia-riva-client", specifier = ">=2.25.1" }, { name = "open-clip-torch", marker = "extra == 'benchmarks'", specifier = "==3.2.0" }, { name = "open-clip-torch", marker = "extra == 'nemotron-parse'", specifier = "==3.2.0" }, - { name = "opencv-python-headless", marker = "extra == 'local'", specifier = ">=4.8.0" }, + { name = "opencv-python-headless", specifier = ">=4.8.0" }, { name = "pandas", specifier = ">=2.0,<3" }, { name = "pillow", specifier = "==12.2.0" }, { name = "prometheus-fastapi-instrumentator", specifier = ">=7.0,<8" }, @@ -2629,15 +2734,16 @@ requires-dist = [ { name = "pydantic", specifier = ">=2.8.0" }, { name = "pypdfium2", specifier = "==4.30.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.2" }, + { name = "pytest", marker = "extra == 'test'", specifier = ">=8.0.2" }, + { name = "python-dateutil", specifier = ">=2.9.0" }, { name = "python-multipart", specifier = ">=0.0.9" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "ray", extras = ["data", "serve"], specifier = ">=2.49.0" }, { name = "requests", specifier = ">=2.32.5" }, { name = "rich", specifier = ">=13.7.0" }, { name = "s3fs", specifier = ">=2025.5.1" }, - { name = "scikit-learn", marker = "extra == 'local'", specifier = ">=1.6.0" }, - { name = "scikit-learn", marker = "extra == 'service'", specifier = ">=1.6.0" }, - { name = "scipy", marker = "extra == 'multimedia'", specifier = ">=1.11.0" }, + { name = "scikit-learn", specifier = ">=1.6.0" }, + { name = "scipy", specifier = ">=1.11.0" }, { name = "soundfile", marker = "extra == 'multimedia'", specifier = ">=0.12.0" }, { name = "sqlglot", specifier = ">=30.0.0" }, { name = "timm", marker = "extra == 'local'", specifier = "==1.0.22" }, @@ -2653,11 +2759,12 @@ requires-dist = [ { name = "tritonclient", marker = "extra == 'local'" }, { name = "typer", specifier = ">=0.12.0" }, { name = "universal-pathlib", specifier = ">=0.2.0" }, + { name = "unstructured-client" }, { name = "urllib3", specifier = ">=2.7.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.30.0" }, { name = "vllm", marker = "sys_platform == 'linux' and extra == 'local'", specifier = "==0.20.0" }, ] -provides-extras = ["service", "local", "multimedia", "nemotron-parse", "tabular", "benchmarks", "llm", "dev", "all"] +provides-extras = ["service", "local", "multimedia", "nemotron-parse", "tabular", "benchmarks", "llm", "dev", "test", "all"] [[package]] name = "nemotron-graphic-elements-v1" @@ -3553,6 +3660,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/2d/d4bf65e47cea8ff2c794a600c4fd1273a7902f268757c531e0ee9f18aa58/pooch-1.9.0-py3-none-any.whl", hash = "sha256:f265597baa9f760d25ceb29d0beb8186c243d6607b0f60b83ecf14078dbc703b", size = 67175, upload-time = "2026-01-30T19:15:08.36Z" }, ] +[[package]] +name = "proglog" +version = "0.1.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/af/c108866c452eda1132f3d6b3cb6be2ae8430c97e9309f38ca9dbd430af37/proglog-0.1.12.tar.gz", hash = "sha256:361ee074721c277b89b75c061336cb8c5f287c92b043efa562ccf7866cda931c", size = 8794, upload-time = "2025-05-09T14:36:18.316Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/1b/f7ea6cde25621cd9236541c66ff018f4268012a534ec31032bcb187dc5e7/proglog-0.1.12-py3-none-any.whl", hash = "sha256:ccaafce51e80a81c65dc907a460c07ccb8ec1f78dc660cfd8f9ec3a22f01b84c", size = 6337, upload-time = "2025-05-09T14:36:16.798Z" }, +] + [[package]] name = "prometheus-client" version = "0.25.0" @@ -3901,6 +4020,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pypdf" +version = "6.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/6d/20879428577c1e57ecd41b69dc86beabf43db9287ad2e702207f8b48c751/pypdf-6.12.2.tar.gz", hash = "sha256:111669eb6680c04495ae0c113a1476e3bf93a95761d23c7406b591c80a6490b1", size = 6468184, upload-time = "2026-05-26T13:31:26.911Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/44/fee070a16639d9869bb6a7e0f3a1b3946da1d66f32b9260b4d19cb90d7b2/pypdf-6.12.2-py3-none-any.whl", hash = "sha256:67b2699357a1f3f4c945940ea80826349ee507c9e2577724a14b4941982c104d", size = 343865, upload-time = "2026-05-26T13:31:25.068Z" }, +] + [[package]] name = "pypdfium2" version = "4.30.0" @@ -5077,6 +5205,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dd/1a/5d9a402b39ec892d856bbdd9db502ff73ce28cdf4aff72eb1ce1d6843506/universal_pathlib-0.3.10-py3-none-any.whl", hash = "sha256:dfaf2fb35683d2eb1287a3ed7b215e4d6016aa6eaf339c607023d22f90821c66", size = 83528, upload-time = "2026-02-22T14:40:57.316Z" }, ] +[[package]] +name = "unstructured-client" +version = "0.42.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiofiles" }, + { name = "cryptography" }, + { name = "httpcore" }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "pypdf" }, + { name = "requests-toolbelt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/67/6afb5337e97566a9dc0337606223893ce01f175bd17bf05844a816581b69/unstructured_client-0.42.8.tar.gz", hash = "sha256:663655548ed5c205efb48b7f38ca0906998b33571512f7c53c60aa811e514464", size = 94400, upload-time = "2026-01-14T21:54:03.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/18/d792b297937459ef54e3972b08ce3b5bdd4018d053837a8cfb3c40dd1c49/unstructured_client-0.42.8-py3-none-any.whl", hash = "sha256:6dbdb62d36554a5cbe61dc1b6ef0c8b11a46cc61e2602c2dc22975ba78028214", size = 219970, upload-time = "2026-01-14T21:54:01.206Z" }, +] + [[package]] name = "urllib3" version = "2.7.0"