From bd53bffb30c2df305f3569e96cf0b98ed59f94b3 Mon Sep 17 00:00:00 2001 From: Sanjay Date: Tue, 26 May 2026 22:19:53 +0530 Subject: [PATCH 1/3] Add ed YouTube Transcript RAG pipeline using Haystack. --- .../youtube_transcript_rag_haystack.ipynb | 2001 +++++++++++++++++ 1 file changed, 2001 insertions(+) create mode 100644 notebooks/youtube_transcript_rag_haystack.ipynb diff --git a/notebooks/youtube_transcript_rag_haystack.ipynb b/notebooks/youtube_transcript_rag_haystack.ipynb new file mode 100644 index 0000000..066d4fe --- /dev/null +++ b/notebooks/youtube_transcript_rag_haystack.ipynb @@ -0,0 +1,2001 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOxBySgJruHsXHgPCaSJJ10", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "8e325ba74ae1449a90a8c81ef579d58d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5ab99ab2641a40a19dfb3bf7741ab4e3", + "IPY_MODEL_dd824384d6c04a78ba1c082087141ae5", + "IPY_MODEL_44e9c2d220d8417399b03a9f1eb43b76" + ], + "layout": "IPY_MODEL_9bad42ab1b7a4fff9f1b691bfb057a27" + } + }, + "5ab99ab2641a40a19dfb3bf7741ab4e3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ed208d9a57854b7cb85d7c4d571ecd47", + "placeholder": "​", + "style": "IPY_MODEL_0adb4596bb6c4257b2cb95ce044b6a88", + "value": "Batches: 100%" + } + }, + "dd824384d6c04a78ba1c082087141ae5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_70b22477297f40cd9622dbe8b27d03b8", + "max": 6, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_75547facb50944e3b269bc41a914165c", + "value": 6 + } + }, + "44e9c2d220d8417399b03a9f1eb43b76": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5f07bfdb9f8e43ba8fb4bedb4f678738", + "placeholder": "​", + "style": "IPY_MODEL_2484c6b8635a4a379f664628b8902699", + "value": " 6/6 [02:01<00:00, 18.97s/it]" + } + }, + "9bad42ab1b7a4fff9f1b691bfb057a27": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ed208d9a57854b7cb85d7c4d571ecd47": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0adb4596bb6c4257b2cb95ce044b6a88": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "70b22477297f40cd9622dbe8b27d03b8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "75547facb50944e3b269bc41a914165c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5f07bfdb9f8e43ba8fb4bedb4f678738": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2484c6b8635a4a379f664628b8902699": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "74e4d78638c949d6a1d10dbe80667e49": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_105a5c248c8a4d78af91d09fe46e5173", + "IPY_MODEL_4a5cf3ebdc284ca5920906c24b25931a", + "IPY_MODEL_4874cb47752a4ef7846b902157411ecd" + ], + "layout": "IPY_MODEL_c18ade9c08034f209b7433b2bebf0a63" + } + }, + "105a5c248c8a4d78af91d09fe46e5173": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_295cdf0d7f944b76a5b1582286f37ff4", + "placeholder": "​", + "style": "IPY_MODEL_7dd0607bbdf24422b313c0b37b6c001f", + "value": "Batches: 100%" + } + }, + "4a5cf3ebdc284ca5920906c24b25931a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_28ac4db982c642be95ad8f883972d584", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_47900641a10042efbdc43196d5598406", + "value": 1 + } + }, + "4874cb47752a4ef7846b902157411ecd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d3d5553be2f1476cae6051cf83aebc09", + "placeholder": "​", + "style": "IPY_MODEL_2b1c168bdb5d4fcca5a210d8ee915013", + "value": " 1/1 [00:00<00:00,  6.42it/s]" + } + }, + "c18ade9c08034f209b7433b2bebf0a63": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "295cdf0d7f944b76a5b1582286f37ff4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7dd0607bbdf24422b313c0b37b6c001f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "28ac4db982c642be95ad8f883972d584": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "47900641a10042efbdc43196d5598406": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d3d5553be2f1476cae6051cf83aebc09": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2b1c168bdb5d4fcca5a210d8ee915013": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "76ee4be23ee04bf19c4aae5f7811fa1f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_bfee72a01c3646b89a663e40c2c3cde6", + "IPY_MODEL_23727974070c4198aaa27da2516921dd", + "IPY_MODEL_bc7317368eea4ab4895421c02b536e8e" + ], + "layout": "IPY_MODEL_6a8098eda41948fcabca8762c6c59f3f" + } + }, + "bfee72a01c3646b89a663e40c2c3cde6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48f404ecd2304bcca704e9c277b3495c", + "placeholder": "​", + "style": "IPY_MODEL_5b88a70c72b94ce7a31df79317abb845", + "value": "Batches: 100%" + } + }, + "23727974070c4198aaa27da2516921dd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9cd8c9788b8a4e27be205c16335c46ed", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4c944102b2e847ecaa5cd4930f865ee6", + "value": 1 + } + }, + "bc7317368eea4ab4895421c02b536e8e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_51dfc644ec964da0a855d89a614e12ae", + "placeholder": "​", + "style": "IPY_MODEL_b49261b0f4d740b8b62e9697aa778118", + "value": " 1/1 [00:00<00:00,  7.44it/s]" + } + }, + "6a8098eda41948fcabca8762c6c59f3f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48f404ecd2304bcca704e9c277b3495c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5b88a70c72b94ce7a31df79317abb845": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9cd8c9788b8a4e27be205c16335c46ed": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c944102b2e847ecaa5cd4930f865ee6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "51dfc644ec964da0a855d89a614e12ae": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b49261b0f4d740b8b62e9697aa778118": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a147a70806a947de979278b6fb898ac4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fa7335b6d248475090b7e0d0fb6532d5", + "IPY_MODEL_5b62735531144391960241a96c102db1", + "IPY_MODEL_11e188b99710499c8f33bbf51e8e972d" + ], + "layout": "IPY_MODEL_063a8b6111fd462995f0750da42bba92" + } + }, + "fa7335b6d248475090b7e0d0fb6532d5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ade60a9ecc184023ab7b36a1d2806ba8", + "placeholder": "​", + "style": "IPY_MODEL_aff5df7d07b647eb81285357a7c8ad60", + "value": "Batches: 100%" + } + }, + "5b62735531144391960241a96c102db1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e8b72ddec6864688908af034c6debcfe", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_348961e5157b4a04841a83dd3ce960f1", + "value": 1 + } + }, + "11e188b99710499c8f33bbf51e8e972d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_31e3b6747cf443ce97ef8abff3f43092", + "placeholder": "​", + "style": "IPY_MODEL_5c09ed607980444fbcefe92d52f91517", + "value": " 1/1 [00:00<00:00,  8.20it/s]" + } + }, + "063a8b6111fd462995f0750da42bba92": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ade60a9ecc184023ab7b36a1d2806ba8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aff5df7d07b647eb81285357a7c8ad60": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e8b72ddec6864688908af034c6debcfe": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "348961e5157b4a04841a83dd3ce960f1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "31e3b6747cf443ce97ef8abff3f43092": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5c09ed607980444fbcefe92d52f91517": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# YouTube Transcript RAG with Haystack and HuggingFace\n", + "\n", + "> This cookbook shows how to build a RAG pipeline over any YouTube video\n", + "> transcript using [Haystack](https://github.com/deepset-ai/haystack).\n", + "\n", + "## Overview\n", + "\n", + "## What this cookbook does\n", + "\n", + "- [Fetches the transcript of any public YouTube video automatically](#step-1-fetch-youtube-transcript)\n", + "- [Splits it into searchable chunks](#step-2-split-and-index-transcript-using-haystack)\n", + "- [Embeds chunks using BAAI/bge-base-en-v1.5](#step-2-split-and-index-transcript-using-haystack)\n", + "- [Retrieves the most relevant chunks using semantic search](#step-3-build-rag-query-pipeline-using-haystack)\n", + "- [Generates accurate answers using Qwen2.5 via HuggingFace Inference API (free)](#step-3-build-rag-query-pipeline-using-haystack)\n", + "\n", + "## Components used\n", + "- [YouTubeTranscriptApi](https://github.com/jdepoix/youtube-transcript-api): to fetch video transcripts\n", + "- [DocumentSplitter](https://docs.haystack.deepset.ai/docs/documentsplitter): to split transcript into overlapping chunks\n", + "- [SentenceTransformersDocumentEmbedder](https://docs.haystack.deepset.ai/docs/sentencetransformersdocumentembedder): to embed chunks using `BAAI/bge-base-en-v1.5`\n", + "- [InMemoryDocumentStore](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore): to store and search embedded chunks\n", + "- [InMemoryEmbeddingRetriever](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever): to retrieve relevant chunks for a query\n", + "- [InferenceClient](https://huggingface.co/docs/huggingface_hub/guides/inference): to generate answers using `Qwen2.5-7B-Instruct`\n", + "\n" + ], + "metadata": { + "id": "bjn02OYcPdGj" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Install the dependencies" + ], + "metadata": { + "id": "dw24tO4oSIDQ" + } + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": true, + "id": "Ggq2FdGNHh-h" + }, + "outputs": [], + "source": [ + "!pip install haystack-ai youtube-transcript-api sentence-transformers -q" + ] + }, + { + "cell_type": "code", + "source": [ + "from haystack import Document, Pipeline\n", + "from haystack.document_stores.in_memory import InMemoryDocumentStore\n", + "from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder\n", + "from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever\n", + "from haystack.components.builders import ChatPromptBuilder\n", + "from haystack.components.generators.chat import HuggingFaceAPIChatGenerator\n", + "from haystack.components.preprocessors import DocumentSplitter\n", + "from haystack.dataclasses import ChatMessage\n", + "from huggingface_hub import InferenceClient\n", + "from youtube_transcript_api import YouTubeTranscriptApi\n", + "import os" + ], + "metadata": { + "id": "TewyL8tbHnTw" + }, + "execution_count": 37, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Setting up your HuggingFace API Token\n", + "\n", + "This cookbook uses HuggingFace's free Inference API to generate answers.\n", + "You'll need a free HuggingFace account and API token to proceed.\n", + "\n", + "### Step 1 — Create a free HuggingFace account\n", + "Go to [huggingface.co/join](https://huggingface.co/join) and sign up for free.\n", + "\n", + "### Step 2 — Generate an API token\n", + "1. Go to [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)\n", + "2. Click **\"New token\"**\n", + "3. Give it any name (e.g. `haystack-notebook`)\n", + "4. Set the role to **\"Read\"**\n", + "5. Click **\"Generate a token\"** and copy it\n", + "\n", + "### Step 3 — Add token to Colab Secrets\n", + "1. Click the ** key icon** in the left sidebar of Colab\n", + "2. Click **\"Add new secret\"**\n", + "3. Set the name to exactly `HF_TOKEN`\n", + "4. Paste your token as the value\n", + "5. Toggle the **notebook access** switch to ON\n" + ], + "metadata": { + "id": "mjO6vV7g_cq_" + } + }, + { + "cell_type": "code", + "source": [ + "from google.colab import userdata\n", + "import os\n", + "\n", + "os.environ[\"HF_API_TOKEN\"] = userdata.get('HF_TOKEN')" + ], + "metadata": { + "id": "7OtDfuZ1Hz7a" + }, + "execution_count": 38, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 1 — Fetch YouTube Transcript\n", + "\n", + "\n", + "\n", + "We use `youtube-transcript-api` to fetch the full transcript of any public\n", + "YouTube video. The transcript is joined into a single string and wrapped\n", + "into a Haystack [`Document`](https://docs.haystack.deepset.ai/docs/data-classes#document)\n", + "object — the core data structure Haystack uses to represent text content\n", + "throughout the entire pipeline.\n", + "\n", + "> A `Document` in Haystack can hold text content, metadata, and embeddings\n", + "> all in one place — making it easy to pass data between pipeline components." + ], + "metadata": { + "id": "mzTvxtxeQCv0" + } + }, + { + "cell_type": "code", + "source": [ + "def get_transcript(video_url):\n", + "\n", + " # Extract video ID from URL\n", + " video_id=video_url.split(\"v=\")[-1].split(\"&\")[0]\n", + "\n", + " # Fetch transcript using new API\n", + " ytt_api=YouTubeTranscriptApi()\n", + " transcript=ytt_api.fetch(video_id)\n", + "\n", + " # Join all text chunks into one document\n", + " full_text=\" \".join([entry.text for entry in transcript])\n", + "\n", + " return full_text\n", + "\n", + "# Enter any YouTube URL here\n", + "video_url=input(\"Enter YouTube URL: \")\n", + "transcript_text=get_transcript(video_url)\n", + "\n", + "print(f\"Transcript fetched successfully!\")\n", + "print(f\"Total characters: {len(transcript_text)}\")\n", + "print(f\"\\nPreview:\\n{transcript_text[:300]}...\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "75G-QJasIhYI", + "outputId": "230827ea-591e-402b-c607-58ff0d93cb63" + }, + "execution_count": 39, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Enter YouTube URL: https://www.youtube.com/watch?v=VMj-3S1tku0&t=1263s\n", + "Transcript fetched successfully!\n", + "Total characters: 123212\n", + "\n", + "Preview:\n", + "hello my name is andre and i've been training deep neural networks for a bit more than a decade and in this lecture i'd like to show you what neural network training looks like under the hood so in particular we are going to start with a blank jupiter notebook and by the end of this lecture we will ...\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 2 — Split and Index Transcript using Haystack\n", + "\n", + "This step uses three Haystack components to prepare the transcript for retrieval.\n", + "\n", + "### DocumentSplitter\n", + "[`DocumentSplitter`](https://docs.haystack.deepset.ai/docs/documentsplitter)\n", + "breaks the transcript `Document` into smaller chunks of **150 words** with\n", + "**20 word overlap**.\n", + "\n", + "Haystack's `DocumentSplitter` supports splitting by `word`, `sentence`, or\n", + "`passage` — we use `word` here for consistent chunk sizes. The overlap ensures\n", + "context is preserved at chunk boundaries.\n", + "\n", + "### SentenceTransformersDocumentEmbedder\n", + "[`SentenceTransformersDocumentEmbedder`](https://docs.haystack.deepset.ai/docs/sentencetransformersdocumentembedder)\n", + "converts each chunk into a dense vector embedding using `BAAI/bge-base-en-v1.5`\n", + "— a top ranked model on the [MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard).\n", + "\n", + "Haystack's embedder automatically adds the embedding to each `Document` object's\n", + "`embedding` field, keeping text and its vector representation together.\n", + "\n", + "### InMemoryDocumentStore\n", + "[`InMemoryDocumentStore`](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore)\n", + "stores all embedded `Document` objects in memory for fast similarity search.\n", + "\n", + "`InMemoryDocumentStore` is ideal for lightweight single-video demos like this one.\n", + "\n", + "For larger scale use cases with multiple videos haystack supports many document stores for production use cases — including\n", + "[Qdrant](https://haystack.deepset.ai/integrations/qdrant),\n", + "[Weaviate](https://haystack.deepset.ai/integrations/weaviate), and\n", + "[Elasticsearch](https://haystack.deepset.ai/integrations/elasticsearch).\n", + "\n" + ], + "metadata": { + "id": "VyljdgLkQSgB" + } + }, + { + "cell_type": "code", + "source": [ + "# Initialize document store\n", + "document_store=InMemoryDocumentStore()\n", + "\n", + "# Convert to Haystack Document\n", + "docs=[Document(content=transcript_text)]\n", + "\n", + "# Split into chunks\n", + "splitter=DocumentSplitter(\n", + " split_by=\"word\",\n", + " split_length=150,\n", + " split_overlap=20\n", + ")\n", + "\n", + "split_docs=splitter.run(documents=docs)[\"documents\"]\n", + "\n", + "# Embed chunks\n", + "embedder=SentenceTransformersDocumentEmbedder(\n", + " model=\"BAAI/bge-base-en-v1.5\"\n", + ")\n", + "embedder.warm_up()\n", + "embedded_docs=embedder.run(documents=split_docs)[\"documents\"]\n", + "\n", + "# Write to store\n", + "document_store.write_documents(embedded_docs)\n", + "\n", + "# Verify documents are stored correctly\n", + "print(f\" {document_store.count_documents()} chunks indexed successfully!\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "8e325ba74ae1449a90a8c81ef579d58d", + "5ab99ab2641a40a19dfb3bf7741ab4e3", + "dd824384d6c04a78ba1c082087141ae5", + "44e9c2d220d8417399b03a9f1eb43b76", + "9bad42ab1b7a4fff9f1b691bfb057a27", + "ed208d9a57854b7cb85d7c4d571ecd47", + "0adb4596bb6c4257b2cb95ce044b6a88", + "70b22477297f40cd9622dbe8b27d03b8", + "75547facb50944e3b269bc41a914165c", + "5f07bfdb9f8e43ba8fb4bedb4f678738", + "2484c6b8635a4a379f664628b8902699" + ] + }, + "id": "YxHrQXgeIpHb", + "outputId": "1f3c2d0d-93aa-4600-966c-3306dd7e1a07" + }, + "execution_count": 40, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Batches: 0%| | 0/6 [00:00 It is critical to use the **same model** for both document and query\n", + "> embedding — different models produce vectors in different spaces, making\n", + "> similarity search meaningless.\n", + "\n", + "### 2. Retrieve relevant chunks\n", + "[`InMemoryEmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever)\n", + "finds the **top 5 most similar chunks** using cosine similarity between the\n", + "question embedding and all stored chunk embeddings.\n", + "\n", + "This is semantic search — it understands *meaning*, not just keyword matches.\n", + "\n", + "### 3. Generate the answer\n", + "The 5 retrieved chunks are passed as context to `Qwen2.5-7B-Instruct` via\n", + "HuggingFace's free\n", + "[Inference API](https://huggingface.co/docs/api-inference/index).\n", + "\n", + "The model is explicitly instructed to answer **only** based on the provided\n", + "transcript context — this prevents hallucinations and keeps answers grounded\n", + "in the video content." + ], + "metadata": { + "id": "BIgYgRcH3yIN" + } + }, + { + "cell_type": "code", + "source": [ + "# Initialize components\n", + "text_embedder=SentenceTransformersTextEmbedder(\n", + " model=\"BAAI/bge-base-en-v1.5\"\n", + ")\n", + "text_embedder.warm_up()\n", + "\n", + "retriever=InMemoryEmbeddingRetriever(\n", + " document_store=document_store,\n", + " top_k=5\n", + ")\n", + "\n", + "client=InferenceClient(\n", + " model=\"Qwen/Qwen2.5-7B-Instruct\",\n", + " token=os.environ[\"HF_API_TOKEN\"]\n", + ")\n", + "\n", + "def ask(question):\n", + "\n", + " # Embed question\n", + " embedded_question=text_embedder.run(text=question)\n", + "\n", + " # Retrieve relevant chunks\n", + " retrieved=retriever.run(\n", + " query_embedding=embedded_question[\"embedding\"]\n", + " )\n", + "\n", + " # Build context\n", + " context=\"\\n\".join([doc.content for doc in retrieved[\"documents\"]])\n", + "\n", + " # Build prompt\n", + " prompt = f\"\"\"You are a helpful assistant that answers questions\n", + " based on YouTube video transcripts.\n", + "\n", + "Context:{context}\n", + "\n", + "Question:{question}\n", + "\n", + "Answer based only on the transcript content above.\n", + "\"\"\"\n", + " # Generate answer\n", + " result=client.chat_completion(\n", + " messages=[{\"role\": \"user\", \"content\": prompt}],\n", + " max_tokens=300\n", + " )\n", + "\n", + " print(f\"Question: {question}\")\n", + " print(f\"\\nAnswer: {result.choices[0].message.content}\")\n", + " print(\"\\n\" + \"─\"*60 + \"\\n\")\n", + "\n", + "print(\"Pipeline ready!\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QZXG_t0jNflk", + "outputId": "4b194440-54ba-4630-aa39-7c8bc3947942" + }, + "execution_count": 41, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Pipeline ready!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 4 — Ask Questions\n", + "\n", + "The pipeline is ready! Pass any YouTube URL and ask questions about the video.\n", + "\n", + "> **Tips for best results:**\n", + "> - Ask specific questions rather than vague ones\n", + "> - Conceptual questions like *\"How does X work?\"* perform better than\n", + "> opinion-based ones\n", + "> - The answer quality depends on how clearly the topic is covered in the video\n", + "\n", + "Feel free to swap in any YouTube URL and ask your own questions below!" + ], + "metadata": { + "id": "gL6iDKKKQueP" + } + }, + { + "cell_type": "code", + "source": [ + "# Try your own YouTube URL and questions!\n", + "ask(\"What is the main topic of this video?\")\n", + "ask(\"What are the key concepts explained?\")\n", + "ask(\"What prerequisites does the speaker recommend?\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 654, + "referenced_widgets": [ + "74e4d78638c949d6a1d10dbe80667e49", + "105a5c248c8a4d78af91d09fe46e5173", + "4a5cf3ebdc284ca5920906c24b25931a", + "4874cb47752a4ef7846b902157411ecd", + "c18ade9c08034f209b7433b2bebf0a63", + "295cdf0d7f944b76a5b1582286f37ff4", + "7dd0607bbdf24422b313c0b37b6c001f", + "28ac4db982c642be95ad8f883972d584", + "47900641a10042efbdc43196d5598406", + "d3d5553be2f1476cae6051cf83aebc09", + "2b1c168bdb5d4fcca5a210d8ee915013", + "76ee4be23ee04bf19c4aae5f7811fa1f", + "bfee72a01c3646b89a663e40c2c3cde6", + "23727974070c4198aaa27da2516921dd", + "bc7317368eea4ab4895421c02b536e8e", + "6a8098eda41948fcabca8762c6c59f3f", + "48f404ecd2304bcca704e9c277b3495c", + "5b88a70c72b94ce7a31df79317abb845", + "9cd8c9788b8a4e27be205c16335c46ed", + "4c944102b2e847ecaa5cd4930f865ee6", + "51dfc644ec964da0a855d89a614e12ae", + "b49261b0f4d740b8b62e9697aa778118", + "a147a70806a947de979278b6fb898ac4", + "fa7335b6d248475090b7e0d0fb6532d5", + "5b62735531144391960241a96c102db1", + "11e188b99710499c8f33bbf51e8e972d", + "063a8b6111fd462995f0750da42bba92", + "ade60a9ecc184023ab7b36a1d2806ba8", + "aff5df7d07b647eb81285357a7c8ad60", + "e8b72ddec6864688908af034c6debcfe", + "348961e5157b4a04841a83dd3ce960f1", + "31e3b6747cf443ce97ef8abff3f43092", + "5c09ed607980444fbcefe92d52f91517" + ] + }, + "id": "XYbAa-rVNzrC", + "outputId": "287dafdb-3b23-4693-dcee-489408161b9e" + }, + "execution_count": 42, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Batches: 0%| | 0/1 [00:00 Date: Tue, 26 May 2026 22:21:32 +0530 Subject: [PATCH 2/3] Added YouTube Transcript RAG pipeline using Haystack. --- .../youtube_transcript_rag_haystack.ipynb | 1604 +---------------- 1 file changed, 16 insertions(+), 1588 deletions(-) diff --git a/notebooks/youtube_transcript_rag_haystack.ipynb b/notebooks/youtube_transcript_rag_haystack.ipynb index 066d4fe..d2c90f9 100644 --- a/notebooks/youtube_transcript_rag_haystack.ipynb +++ b/notebooks/youtube_transcript_rag_haystack.ipynb @@ -13,1378 +13,6 @@ }, "language_info": { "name": "python" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "8e325ba74ae1449a90a8c81ef579d58d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_5ab99ab2641a40a19dfb3bf7741ab4e3", - "IPY_MODEL_dd824384d6c04a78ba1c082087141ae5", - "IPY_MODEL_44e9c2d220d8417399b03a9f1eb43b76" - ], - "layout": "IPY_MODEL_9bad42ab1b7a4fff9f1b691bfb057a27" - } - }, - "5ab99ab2641a40a19dfb3bf7741ab4e3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ed208d9a57854b7cb85d7c4d571ecd47", - "placeholder": "​", - "style": "IPY_MODEL_0adb4596bb6c4257b2cb95ce044b6a88", - "value": "Batches: 100%" - } - }, - "dd824384d6c04a78ba1c082087141ae5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_70b22477297f40cd9622dbe8b27d03b8", - "max": 6, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_75547facb50944e3b269bc41a914165c", - "value": 6 - } - }, - "44e9c2d220d8417399b03a9f1eb43b76": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5f07bfdb9f8e43ba8fb4bedb4f678738", - "placeholder": "​", - "style": "IPY_MODEL_2484c6b8635a4a379f664628b8902699", - "value": " 6/6 [02:01<00:00, 18.97s/it]" - } - }, - "9bad42ab1b7a4fff9f1b691bfb057a27": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ed208d9a57854b7cb85d7c4d571ecd47": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0adb4596bb6c4257b2cb95ce044b6a88": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "70b22477297f40cd9622dbe8b27d03b8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "75547facb50944e3b269bc41a914165c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5f07bfdb9f8e43ba8fb4bedb4f678738": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2484c6b8635a4a379f664628b8902699": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "74e4d78638c949d6a1d10dbe80667e49": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_105a5c248c8a4d78af91d09fe46e5173", - "IPY_MODEL_4a5cf3ebdc284ca5920906c24b25931a", - "IPY_MODEL_4874cb47752a4ef7846b902157411ecd" - ], - "layout": "IPY_MODEL_c18ade9c08034f209b7433b2bebf0a63" - } - }, - "105a5c248c8a4d78af91d09fe46e5173": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_295cdf0d7f944b76a5b1582286f37ff4", - "placeholder": "​", - "style": "IPY_MODEL_7dd0607bbdf24422b313c0b37b6c001f", - "value": "Batches: 100%" - } - }, - "4a5cf3ebdc284ca5920906c24b25931a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_28ac4db982c642be95ad8f883972d584", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_47900641a10042efbdc43196d5598406", - "value": 1 - } - }, - "4874cb47752a4ef7846b902157411ecd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d3d5553be2f1476cae6051cf83aebc09", - "placeholder": "​", - "style": "IPY_MODEL_2b1c168bdb5d4fcca5a210d8ee915013", - "value": " 1/1 [00:00<00:00,  6.42it/s]" - } - }, - "c18ade9c08034f209b7433b2bebf0a63": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "295cdf0d7f944b76a5b1582286f37ff4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7dd0607bbdf24422b313c0b37b6c001f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "28ac4db982c642be95ad8f883972d584": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "47900641a10042efbdc43196d5598406": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d3d5553be2f1476cae6051cf83aebc09": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2b1c168bdb5d4fcca5a210d8ee915013": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "76ee4be23ee04bf19c4aae5f7811fa1f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_bfee72a01c3646b89a663e40c2c3cde6", - "IPY_MODEL_23727974070c4198aaa27da2516921dd", - "IPY_MODEL_bc7317368eea4ab4895421c02b536e8e" - ], - "layout": "IPY_MODEL_6a8098eda41948fcabca8762c6c59f3f" - } - }, - "bfee72a01c3646b89a663e40c2c3cde6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_48f404ecd2304bcca704e9c277b3495c", - "placeholder": "​", - "style": "IPY_MODEL_5b88a70c72b94ce7a31df79317abb845", - "value": "Batches: 100%" - } - }, - "23727974070c4198aaa27da2516921dd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9cd8c9788b8a4e27be205c16335c46ed", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4c944102b2e847ecaa5cd4930f865ee6", - "value": 1 - } - }, - "bc7317368eea4ab4895421c02b536e8e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_51dfc644ec964da0a855d89a614e12ae", - "placeholder": "​", - "style": "IPY_MODEL_b49261b0f4d740b8b62e9697aa778118", - "value": " 1/1 [00:00<00:00,  7.44it/s]" - } - }, - "6a8098eda41948fcabca8762c6c59f3f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "48f404ecd2304bcca704e9c277b3495c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5b88a70c72b94ce7a31df79317abb845": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "9cd8c9788b8a4e27be205c16335c46ed": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c944102b2e847ecaa5cd4930f865ee6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "51dfc644ec964da0a855d89a614e12ae": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b49261b0f4d740b8b62e9697aa778118": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a147a70806a947de979278b6fb898ac4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_fa7335b6d248475090b7e0d0fb6532d5", - "IPY_MODEL_5b62735531144391960241a96c102db1", - "IPY_MODEL_11e188b99710499c8f33bbf51e8e972d" - ], - "layout": "IPY_MODEL_063a8b6111fd462995f0750da42bba92" - } - }, - "fa7335b6d248475090b7e0d0fb6532d5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ade60a9ecc184023ab7b36a1d2806ba8", - "placeholder": "​", - "style": "IPY_MODEL_aff5df7d07b647eb81285357a7c8ad60", - "value": "Batches: 100%" - } - }, - "5b62735531144391960241a96c102db1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e8b72ddec6864688908af034c6debcfe", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_348961e5157b4a04841a83dd3ce960f1", - "value": 1 - } - }, - "11e188b99710499c8f33bbf51e8e972d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_31e3b6747cf443ce97ef8abff3f43092", - "placeholder": "​", - "style": "IPY_MODEL_5c09ed607980444fbcefe92d52f91517", - "value": " 1/1 [00:00<00:00,  8.20it/s]" - } - }, - "063a8b6111fd462995f0750da42bba92": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ade60a9ecc184023ab7b36a1d2806ba8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "aff5df7d07b647eb81285357a7c8ad60": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e8b72ddec6864688908af034c6debcfe": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "348961e5157b4a04841a83dd3ce960f1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "31e3b6747cf443ce97ef8abff3f43092": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5c09ed607980444fbcefe92d52f91517": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } } }, "cells": [ @@ -1440,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": { "collapsed": true, "id": "Ggq2FdGNHh-h" @@ -1468,7 +96,7 @@ "metadata": { "id": "TewyL8tbHnTw" }, - "execution_count": 37, + "execution_count": null, "outputs": [] }, { @@ -1511,7 +139,7 @@ "metadata": { "id": "7OtDfuZ1Hz7a" }, - "execution_count": 38, + "execution_count": null, "outputs": [] }, { @@ -1560,27 +188,10 @@ "print(f\"\\nPreview:\\n{transcript_text[:300]}...\")" ], "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "75G-QJasIhYI", - "outputId": "230827ea-591e-402b-c607-58ff0d93cb63" + "id": "75G-QJasIhYI" }, - "execution_count": 39, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Enter YouTube URL: https://www.youtube.com/watch?v=VMj-3S1tku0&t=1263s\n", - "Transcript fetched successfully!\n", - "Total characters: 123212\n", - "\n", - "Preview:\n", - "hello my name is andre and i've been training deep neural networks for a bit more than a decade and in this lecture i'd like to show you what neural network training looks like under the hood so in particular we are going to start with a blank jupiter notebook and by the end of this lecture we will ...\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", @@ -1654,50 +265,10 @@ "print(f\" {document_store.count_documents()} chunks indexed successfully!\")" ], "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 66, - "referenced_widgets": [ - "8e325ba74ae1449a90a8c81ef579d58d", - "5ab99ab2641a40a19dfb3bf7741ab4e3", - "dd824384d6c04a78ba1c082087141ae5", - "44e9c2d220d8417399b03a9f1eb43b76", - "9bad42ab1b7a4fff9f1b691bfb057a27", - "ed208d9a57854b7cb85d7c4d571ecd47", - "0adb4596bb6c4257b2cb95ce044b6a88", - "70b22477297f40cd9622dbe8b27d03b8", - "75547facb50944e3b269bc41a914165c", - "5f07bfdb9f8e43ba8fb4bedb4f678738", - "2484c6b8635a4a379f664628b8902699" - ] - }, - "id": "YxHrQXgeIpHb", - "outputId": "1f3c2d0d-93aa-4600-966c-3306dd7e1a07" + "id": "YxHrQXgeIpHb" }, - "execution_count": 40, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Batches: 0%| | 0/6 [00:00 Date: Tue, 26 May 2026 22:38:47 +0530 Subject: [PATCH 3/3] Add YouTube Transcript RAG Pipeline entry to index --- index.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/index.toml b/index.toml index 7eeaf19..dcc7712 100644 --- a/index.toml +++ b/index.toml @@ -370,3 +370,9 @@ title = "Tabular Data Processing with Prior Labs MCP" notebook = "prior_labs_agent.ipynb" new = true topics = ["Agents", "MCP", "Data Processing"] + +[[cookbook]] +title = "Build a YouTube Transcript RAG Pipeline with Haystack" +notebook = "youtube_transcript_rag.ipynb" +topics = ["RAG", "Question-Answering"] +new = true