diff --git a/pytorchconf-2024/category.json b/pytorchconf-2024/category.json new file mode 100644 index 000000000..dfd3d3012 --- /dev/null +++ b/pytorchconf-2024/category.json @@ -0,0 +1,3 @@ +{ + "title": "PyTorch Conference 2024" +} diff --git a/pytorchconf-2024/videos/a-distributed-stateful-dataloader-for-large-scale-pretraining-davis-wertheimer-linsong-chu.json b/pytorchconf-2024/videos/a-distributed-stateful-dataloader-for-large-scale-pretraining-davis-wertheimer-linsong-chu.json new file mode 100644 index 000000000..2caaa1b76 --- /dev/null +++ b/pytorchconf-2024/videos/a-distributed-stateful-dataloader-for-large-scale-pretraining-davis-wertheimer-linsong-chu.json @@ -0,0 +1,25 @@ +{ + "description": "A Distributed Stateful Dataloader for Large-Scale Pretraining - Davis Wertheimer, IBM & Linsong Chu, IBM Research\n\nLarge-scale model pretraining crucially relies on specialized and dedicated dataloaders that can, for example, partition and stream data asynchronously across multiple processes and physical nodes. In this talk we discuss one of the torch-native dataloaders we built and use at IBM Research for addressing these needs. Intended for use in large-scale model pretraining, particularly in research settings where rapid iteration between datasets may be required, our dataloader is distributed, stateful, checkpointable, composable and rescalable \u2013 while remaining a simple extension of the existing PyTorch dataloading framework. It automatically and invisibly handles data sharding, shuffling, subdataset weighting, checkpoint saving and loading, and custom user-defined preprocessing functions, with minimal overhead and high throughput. We discuss these properties and how we achieved them, such as reducing overhead by implementing a custom LCG random number generator, and demonstrate proof of concept on production-scale training of a 7B parameter Llama model over 4 trillion tokens.", + "duration": 1433, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Davis Wertheimer", + "Linsong Chu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/VtT4rdph4Qs/maxresdefault.webp", + "title": "A Distributed Stateful Dataloader for Large-Scale Pretraining", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=VtT4rdph4Qs" + } + ] +} diff --git a/pytorchconf-2024/videos/blobs-to-clips-efficient-end-to-end-video-data-loading-andrew-ho-ahmad-sharif-meta.json b/pytorchconf-2024/videos/blobs-to-clips-efficient-end-to-end-video-data-loading-andrew-ho-ahmad-sharif-meta.json new file mode 100644 index 000000000..9af20c723 --- /dev/null +++ b/pytorchconf-2024/videos/blobs-to-clips-efficient-end-to-end-video-data-loading-andrew-ho-ahmad-sharif-meta.json @@ -0,0 +1,25 @@ +{ + "description": "Blobs to Clips: Efficient End-to-End Video Data Loading - Andrew Ho & Ahmad Sharif, Meta\n\nThe PyTorch team has improved training speed by an order of magnitude for teams at Meta working on Small-to-Large-Scale MultiModal Video models. In this talk we\u2019ll share our learnings on reducing GPU starvation by overcoming data loading challenges such as dealing with large distributed datasets, worker imbalance, compute-bottlenecks due to parallel video decoding and sampling, checkpointing, and debuggability. As part of our commitment to open-source, we are releasing a new decoding library and updating existing PyTorch libraries on GitHub, and invite feedback and contributions from the community.", + "duration": 1544, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Andrew Ho", + "Ahmad Sharif" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/-cBplHNM4RA/maxresdefault.webp", + "title": "Blobs to Clips: Efficient End-to-End Video Data Loading", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=-cBplHNM4RA" + } + ] +} diff --git a/pytorchconf-2024/videos/building-pytorch-computer-vision-algorithms-for-100-skin-shades-emmanuel-acheampong-robomua.json b/pytorchconf-2024/videos/building-pytorch-computer-vision-algorithms-for-100-skin-shades-emmanuel-acheampong-robomua.json new file mode 100644 index 000000000..0db0bfc21 --- /dev/null +++ b/pytorchconf-2024/videos/building-pytorch-computer-vision-algorithms-for-100-skin-shades-emmanuel-acheampong-robomua.json @@ -0,0 +1,24 @@ +{ + "description": "Building PyTorch Computer Vision Algorithms for 100 Skin Shades - Emmanuel Acheampong, roboMUA\n\nAt roboMUA we're leading the charge in building predictive AI models for diverse skin shades with the use of Convolutional Neural Networks (CNNs), and harnessing the power of Generative Adversarial Networks (GANs) specifically for generating realistic images of black hairstyles. Our session showcases PyTorch's versatility in both predictive and generative tasks, offering a comprehensive approach to inclusive AI. For predictive AI models, we leverage PyTorch's flexible framework to develop CNNs. Through innovative techniques in feature engineering and model architecture design, we demonstrate how PyTorch enables accurate prediction across 100 skin shades. Simultaneously, we showcase the transformative potential of GANs in the realm of black hairstyles. By training GANs on a curated dataset of diverse hair textures and styles, we illustrate how PyTorch facilitates the generation of lifelike images that celebrate the beauty and diversity of black hair. Attendees will gain insights into the data preprocessing, model training, and evaluation processes and and learn how PyTorch empowers developers to build inclusive solutions.", + "duration": 882, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Emmanuel Acheampong" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/eGZ3-8ZbFj0/maxresdefault.webp", + "title": "Building PyTorch Computer Vision Algorithms for 100 Skin Shades", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=eGZ3-8ZbFj0" + } + ] +} diff --git a/pytorchconf-2024/videos/building-scientific-computing-infrastructure-software-with-the-pytorch-ecosystem-bharath-ramsundar.json b/pytorchconf-2024/videos/building-scientific-computing-infrastructure-software-with-the-pytorch-ecosystem-bharath-ramsundar.json new file mode 100644 index 000000000..13b5d5982 --- /dev/null +++ b/pytorchconf-2024/videos/building-scientific-computing-infrastructure-software-with-the-pytorch-ecosystem-bharath-ramsundar.json @@ -0,0 +1,24 @@ +{ + "description": "Building Scientific Computing Infrastructure Software with the PyTorch Ecosystem - Bharath Ramsundar, Deep Forest Sciences\n\nThe DeepChem library is a scientific computing library that implements deep learning infrastructure for drug discovery, materials discovery, and biology. The DeepChem community is one of the largest scientific open source projects built in PyTorch, with over 5K stars on Github and thousands of citations. The DeepChem community has learned a number of useful lessons for building and maintaining high quality scientific code built on top of PyTorch. In this talk, I will share our learnings with the PyTorch community and also highlight opportunities for improving scientific support in the ecosystem.", + "duration": 1685, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Bharath Ramsundar" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/Ru-CDK_79NQ/maxresdefault.webp", + "title": "Building Scientific Computing Infrastructure Software with the PyTorch Ecosystem", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Ru-CDK_79NQ" + } + ] +} diff --git a/pytorchconf-2024/videos/data-dependent-shapes-in-pt2-edward-yang-meta.json b/pytorchconf-2024/videos/data-dependent-shapes-in-pt2-edward-yang-meta.json new file mode 100644 index 000000000..0c3e776d7 --- /dev/null +++ b/pytorchconf-2024/videos/data-dependent-shapes-in-pt2-edward-yang-meta.json @@ -0,0 +1,24 @@ +{ + "description": "Data-Dependent Shapes in PT2 - Edward Yang, Meta\n\nData-dependent shapes are ubiquitous whenever you want to take advantage of sparsity in your data representation, whether it is in recommendation systems, mixture of experts or other use cases. We have made a lot of improvements to torch.compile's support for capturing and compiling data dependent shapes, but they also require some user knowledge to work with effectively. This talk will give an overview of PT2's facilities for data dependent compute and how to use them effectively.", + "duration": 1524, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Edward Yang" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/VYuFJUfyWSU/maxresdefault.webp", + "title": "Data-Dependent Shapes in PT2", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=VYuFJUfyWSU" + } + ] +} diff --git a/pytorchconf-2024/videos/dl-compiler-panel-discussion-p-tillet-j-ansel-j-pienaar-t-chen-m-zolotukhin-p-wu.json b/pytorchconf-2024/videos/dl-compiler-panel-discussion-p-tillet-j-ansel-j-pienaar-t-chen-m-zolotukhin-p-wu.json new file mode 100644 index 000000000..35e32524b --- /dev/null +++ b/pytorchconf-2024/videos/dl-compiler-panel-discussion-p-tillet-j-ansel-j-pienaar-t-chen-m-zolotukhin-p-wu.json @@ -0,0 +1,29 @@ +{ + "description": "DL Compiler Panel Discussion - Philip Tillet, OpenAI; Jason Ansel, Meta; Jacques Pienaar, Google; Tianqi Chen, CMU & OctoAI; Mikhail Zolotukhin, Modular; Peng Wu, Meta\n\nSince the release of PyTorch 2 in 2023, torch.compile() has spurred significant new thinking around DL compiler designs at the framework level. In this session, we invite leaders in this space to share their insights based on real experiences of building DL compilers \u2013 Triton, TorchInductor, Halide, TVM, OpenXLA, and Mojo \u2013 and growing their ecosystems. We also invite a \u2018compiler user representative,\u2019 together.ai, to share their recent journey of redesigning the LLM inference stack around torch.compile(). Each leader will give a 10-minute lightning talk and an engaging panel discussion.", + "duration": 2131, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Philip Tillet", + "Jason Ansel", + "Jacques Pienaar", + "Tianqi Chen", + "Mikhail Zolotukhin", + "Peng Wu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/YWDzHGx8PrY/maxresdefault.webp", + "title": "DL Compiler Panel Discussion", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=YWDzHGx8PrY" + } + ] +} diff --git a/pytorchconf-2024/videos/executorch-beta-and-on-device-generative-ai-support-mergen-nachin-mengtao-martin-yuan-meta.json b/pytorchconf-2024/videos/executorch-beta-and-on-device-generative-ai-support-mergen-nachin-mengtao-martin-yuan-meta.json new file mode 100644 index 000000000..01acf3a8d --- /dev/null +++ b/pytorchconf-2024/videos/executorch-beta-and-on-device-generative-ai-support-mergen-nachin-mengtao-martin-yuan-meta.json @@ -0,0 +1,25 @@ +{ + "description": "ExecuTorch Beta and on-Device Generative AI Support - Mergen Nachin & Mengtao (Martin) Yuan, Meta\n\nDuring this session, we will discuss real-life case studies focusing on the productionization of PyTorch models onto edge devices and welcome the community to begin adopting ExecuTorch. Since announcing the ExecuTorch MVP at the previous PTC, we have made significant progress in terms of stability, model coverage, accelerator performance, and developer experience, reaching a milestone that marks the transition to beta status. In addition to the above improvements, we continue to support generative AI models. Since the alpha launch that initially enabled support for LLama2/3 models, we have now expanded our capabilities to include multimodal use cases and developed mobile demo apps showcasing these new features.", + "duration": 1213, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Mergen Nachin", + "Mengtao Yuan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/45yNTi7c1Q0/maxresdefault.webp", + "title": "ExecuTorch Beta and on-Device Generative AI Support", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=45yNTi7c1Q0" + } + ] +} diff --git a/pytorchconf-2024/videos/hacks-to-make-llm-training-faster-daniel-han-unsloth-ai.json b/pytorchconf-2024/videos/hacks-to-make-llm-training-faster-daniel-han-unsloth-ai.json new file mode 100644 index 000000000..58cd2c8bd --- /dev/null +++ b/pytorchconf-2024/videos/hacks-to-make-llm-training-faster-daniel-han-unsloth-ai.json @@ -0,0 +1,24 @@ +{ + "description": "Hacks to Make LLM Training Faster - Daniel Han, Unsloth AI\n\nAs open-source LLMs have become more capable, a substantial ecosystem has developed around the fine-tuning of these models. A thriving community of researchers, developers, practitioners and hobbyists has emerged which focuses on topics ranging from memory efficiency, parameter-efficient fine-tuning and quantization to performance at scale and reproducible evaluations. The goal of this mini-summit is to bring this community together to discuss ideas, share knowledge and build connections.\n\nThe agenda features a keynote from Joe Spisak on the state of the Llama ecosystem followed by invited talks from the founders of Axolotl, Unsloth and torchtune. We conclude the summit with a riveting discussion on what\u2019s next for LLMs, fine-tuning and the PyTorch ecosystem with a fabulous panel of experts - Tim Dettmers (author of bitsandbytes and QLoRA), Hailey Schoelkopf (maintainer of LM Eval Harness at EleutherAI), Aakanksha Chowdhery (Lead author on PaLM and Gemini) and Alexis Conneau (Research Lead at OpenAI)", + "duration": 477, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Daniel Han" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/PdtKkc5jB4g/maxresdefault.webp", + "title": "Hacks to Make LLM Training Faster", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=PdtKkc5jB4g" + } + ] +} diff --git a/pytorchconf-2024/videos/halide-a-halide-backend-for-torchinductor-jason-ansel-meta.json b/pytorchconf-2024/videos/halide-a-halide-backend-for-torchinductor-jason-ansel-meta.json new file mode 100644 index 000000000..edbf1fe34 --- /dev/null +++ b/pytorchconf-2024/videos/halide-a-halide-backend-for-torchinductor-jason-ansel-meta.json @@ -0,0 +1,24 @@ +{ + "description": "[HALIDE] A Halide Backend for TorchInductor - Jason Ansel, Meta\n\nThis talk will focus on a new Halide backend for TorchInductor, which is in addition to the existing Triton and C++ backends. The Halide backend is meant to serve as a reference backend to make it easier to extend TorchInductor to support new backend compilers and hardware devices. Halide has been the inspiration (either in ideas or through forking) of numerous other compiler projects, so it is a good starting point for adding new backends that follow a Halide-like model.", + "duration": 489, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jason Ansel" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/hCvlWZEXRrg/maxresdefault.webp", + "title": "[HALIDE] A Halide Backend for TorchInductor", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hCvlWZEXRrg" + } + ] +} diff --git a/pytorchconf-2024/videos/implementing-a-custom-torch-compile-backend-a-case-study-maanav-dalal-yulong-wang-microsoft.json b/pytorchconf-2024/videos/implementing-a-custom-torch-compile-backend-a-case-study-maanav-dalal-yulong-wang-microsoft.json new file mode 100644 index 000000000..ca9f37341 --- /dev/null +++ b/pytorchconf-2024/videos/implementing-a-custom-torch-compile-backend-a-case-study-maanav-dalal-yulong-wang-microsoft.json @@ -0,0 +1,25 @@ +{ + "description": "Implementing a Custom Torch.Compile Backend - A Case Study - Maanav Dalal & Yulong Wang, Microsoft\n\nThis presentation will dive into the development of the ONNXRuntime (ORT) backend for torch.compile. We'll cover the implementation process, starting with a PyTorch 2.0 generated FX graph, highlighting the unique challenges encountered when serving ORT-specific scenarios and how we solved them. Attendees will gain insights into optimizing performance, overcoming integration hurdles, and achieving efficient execution. Whether you're a developer looking to extend PyTorch's capabilities for your own use cases, keen to learn about ONNX Runtime, or interested in backend performance optimization, and the many steps we've taken to get to where we are now, this session promises valuable takeaways and practical knowledge.", + "duration": 666, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Maanav Dalal", + "Yulong Wang" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/MnNO_13fLtU/maxresdefault.webp", + "title": "Implementing a Custom Torch.Compile Backend - A Case Study", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=MnNO_13fLtU" + } + ] +} diff --git a/pytorchconf-2024/videos/intel-gpu-in-upstream-pytorch-expanding-gpu-choices-and-enhancing-back-eikan-wang-min-jean-cho.json b/pytorchconf-2024/videos/intel-gpu-in-upstream-pytorch-expanding-gpu-choices-and-enhancing-back-eikan-wang-min-jean-cho.json new file mode 100644 index 000000000..9bd2023e5 --- /dev/null +++ b/pytorchconf-2024/videos/intel-gpu-in-upstream-pytorch-expanding-gpu-choices-and-enhancing-back-eikan-wang-min-jean-cho.json @@ -0,0 +1,25 @@ +{ + "description": "Intel GPU in Upstream PyTorch: Expanding GPU Choices and Enhancing Backend Flexibility - Eikan Wang & Min Jean Cho, Intel\n\nThe integration of Intel GPU support into PyTorch marks a pivotal enhancement for PyTorch device and runtime. We generalized the PyTorch device and runtime to accommodate streaming devices. The generalization not only facilitates the deployment of PyTorch on ubiquitous hardware but also makes the integration of different HW backends easier. In addition, PyTorch with Intel GPU supports various Intel GPUs from the data center to the client. It enriches and democratizes PyTorch HW ecosystem. Particularly in AIPC scenarios where Intel's integrated and discrete GPUs are prevalent, Pytorch with Intel GPU can deliver promising performance and improved OOB experience in the AIPC domain that can extend PyTorch's applicability significantly.", + "duration": 1398, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Eikan Wang", + "Min Jean Cho" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/HRdda_kVEh4/maxresdefault.webp", + "title": "Intel GPU in Upstream PyTorch: Expanding GPU Choices and Enhancing Backend Flexibility", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=HRdda_kVEh4" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-building-an-advanced-knowledge-assistant-jerry-liu-co-founder-ceo-llamaindex.json b/pytorchconf-2024/videos/keynote-building-an-advanced-knowledge-assistant-jerry-liu-co-founder-ceo-llamaindex.json new file mode 100644 index 000000000..e4a3a61cd --- /dev/null +++ b/pytorchconf-2024/videos/keynote-building-an-advanced-knowledge-assistant-jerry-liu-co-founder-ceo-llamaindex.json @@ -0,0 +1,24 @@ +{ + "description": "Keynote: Building an Advanced Knowledge Assistant - Jerry Liu, Co-Founder & CEO, LlamaIndex\n\nA huge promise for LLMs is being able to answer questions and solve tasks of arbitrary complexity over an arbitrary number of data sources. The world has started to shift from simple RAG stacks, which are mostly good for answering pointed questions, to agents that can more autonomously reason over a diverse set of inputs, and interleave retrieval and tool use to produce sophisticated outputs.\n\nBuilding a reliable multi-agent system is challenging. There's a core question of developer ergonomics and production deployment - what makes sense outside a notebook setting. In this talk we outline some core building blocks for building advanced research assistants, including advanced RAG modules, event-driven workflow orchestration, and more.", + "duration": 896, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jerry Liu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/EjGdum0UdaI/maxresdefault.webp", + "title": "Keynote: Building an Advanced Knowledge Assistant", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=EjGdum0UdaI" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-community-awards.json b/pytorchconf-2024/videos/keynote-community-awards.json new file mode 100644 index 000000000..0c0f7b6ce --- /dev/null +++ b/pytorchconf-2024/videos/keynote-community-awards.json @@ -0,0 +1,22 @@ +{ + "description": "Keynote: Community Awards", + "duration": 359, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/1dSVYKPk3N0/maxresdefault.webp", + "title": "Keynote: Community Awards", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=1dSVYKPk3N0" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-enabling-generative-ai-on-the-edge-cormac-brick-principal-engineer-google.json b/pytorchconf-2024/videos/keynote-enabling-generative-ai-on-the-edge-cormac-brick-principal-engineer-google.json new file mode 100644 index 000000000..b3e7764fe --- /dev/null +++ b/pytorchconf-2024/videos/keynote-enabling-generative-ai-on-the-edge-cormac-brick-principal-engineer-google.json @@ -0,0 +1,24 @@ +{ + "description": "Keynote: Enabling Generative AI on the Edge - Cormac Brick, Principal Engineer, Google\n\nGenerative AI is no longer just in the cloud - recently it's also getting deployed on edge devices. A disruptive goal of this work is AI-powered applications that respond instantly, work offline, and protect user privacy by processing data locally. In this talk, we'll explore the cutting edge of edge-based generative AI, showcasing open models that are pushing the boundaries of what's possible today on the edge. We'll dive deep into the PyTorch ecosystem, looking at projects that are making it easier than ever to author, optimize, and deploy these models across a wide range of devices.", + "duration": 848, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Cormac Brick" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/n2DsEyLbuJg/maxresdefault.webp", + "title": "Keynote: Enabling Generative AI on the Edge", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=n2DsEyLbuJg" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-navigating-the-architectural-timeline-of-llms-sebastian-raschka-lightning-ai.json b/pytorchconf-2024/videos/keynote-navigating-the-architectural-timeline-of-llms-sebastian-raschka-lightning-ai.json new file mode 100644 index 000000000..5b476a885 --- /dev/null +++ b/pytorchconf-2024/videos/keynote-navigating-the-architectural-timeline-of-llms-sebastian-raschka-lightning-ai.json @@ -0,0 +1,24 @@ +{ + "description": "Keynote: Navigating the Architectural Timeline of LLMs - Sebastian Raschka, Staff Research Engineer, Lightning AI\n\nThe evolution of large language models (LLMs) from the original Generative Pre-trained Transformer (GPT) series to the recent advancements seen in models like Llama 3 has been accompanied by several architectural and methodological innovations. This talk aims to catch attendees up on the latest AI and LLM development trends, highlighting the key changes and motivations that led to the development of recent state-of-the-art LLMs, such as Llama 3.1.\n\nSpecifically, this presentation explores key developments in attention mechanisms, such as sliding window attention, group query, multi-query attention, and FlashAttention, and explains their key motivations and advantages. In addition to exploring the structural changes, this presentation also reviews the recent \"tricks of the trade\" that have improved the training processes and performance of the latest LLMs. This includes the recent two-step pretraining approach in Llama 3.1 and applying knowledge distillation techniques using real datasets like Gemma 2 and synthetic data, as seen in Llama 3.1.\n\nMoreover, we will also examine the integration of system-level optimizations, such as the Mixture of the Expert method and the hybrid model Samba, which combines Mamba techniques with attention mechanisms and illustrates a broader trend toward more specialized and efficient architectures.\n\nThis talk will provide attendees with an understanding of the most notable transformations that have defined the architectural timeline of LLMs.", + "duration": 925, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Sebastian Raschka" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/frkAt-gZVjc/maxresdefault.webp", + "title": "Keynote: Navigating the Architectural Timeline of LLMs", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=frkAt-gZVjc" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-open-language-models-olmo-accelerating-the-science-of-language-modeling-hanna-hajishirzi.json b/pytorchconf-2024/videos/keynote-open-language-models-olmo-accelerating-the-science-of-language-modeling-hanna-hajishirzi.json new file mode 100644 index 000000000..548cf4d1c --- /dev/null +++ b/pytorchconf-2024/videos/keynote-open-language-models-olmo-accelerating-the-science-of-language-modeling-hanna-hajishirzi.json @@ -0,0 +1,24 @@ +{ + "description": "Keynote: Open Language Models (OLMo): Accelerating the Science of Language Modeling - Hanna Hajishirzi, Senior Director NLP Research, Allen Institute for AI\n\nOver the past few years, and especially since the deployment of ChatGPT in November 2022, neural language models with billions of parameters and trained on trillions of words are powering the fastest-growing computing applications in history and generating discussion and debate across society. However, AI scientists cannot study or improve those state-of-the-art models because the models' parameters, training data, code, and even documentation are not openly available. In this talk, I present our OLMo project toward building strong language models and making them fully open to researchers along with open-source code for data management, training, inference, and interaction. In particular, I describe DOLMa, a 3T token open dataset curated for training language models, Tulu, our instruction-tuned language model, and OLMo v1, a fully-open 7B parameter language model trained from scratch.", + "duration": 1036, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Hanna Hajishirzi" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/GYWCYU-Xmnk/maxresdefault.webp", + "title": "Keynote: Open Language Models (OLMo): Accelerating the Science of Language Modeling", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=GYWCYU-Xmnk" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-panel-discussion-responsible-ai-k-rooney-k-varshney-s-hooker-a-madry-r-bommasani.json b/pytorchconf-2024/videos/keynote-panel-discussion-responsible-ai-k-rooney-k-varshney-s-hooker-a-madry-r-bommasani.json new file mode 100644 index 000000000..fd53115a7 --- /dev/null +++ b/pytorchconf-2024/videos/keynote-panel-discussion-responsible-ai-k-rooney-k-varshney-s-hooker-a-madry-r-bommasani.json @@ -0,0 +1,28 @@ +{ + "description": "Keynote Panel Discussion: Responsible AI - Kate Rooney, CNBC; Kush Varshney, IBM T. J. Watson Research Center; Sara Hooker, C4AI; Aleksander Madry, OpenAI; Rishi Bommasani, Stanford University", + "duration": 1832, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Kate Rooney", + "Kush Varshney", + "Sara Hooker", + "Aleksander Madry", + "Rishi Bommasani" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/FLUd0k2ctJE/maxresdefault.jpg", + "title": "Keynote Panel Discussion: Responsible AI", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=FLUd0k2ctJE" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-panel-discussion-scaling-benchmarking.json b/pytorchconf-2024/videos/keynote-panel-discussion-scaling-benchmarking.json new file mode 100644 index 000000000..5c3f06529 --- /dev/null +++ b/pytorchconf-2024/videos/keynote-panel-discussion-scaling-benchmarking.json @@ -0,0 +1,29 @@ +{ + "description": "Keynote Panel Discussion: Scaling & Benchmarking", + "duration": 2023, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Anastasios Angelopoulos", + "Lisa Dunlap", + "James Bradbury", + "Tri Dao", + "Aparna Ramani", + "Soumith Chintala" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/01a4tBLEzVM/maxresdefault.webp", + "title": "Keynote Panel Discussion: Scaling & Benchmarking", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=01a4tBLEzVM" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-pytorch-technical-deep-dive-p-bialecki-p-wu-w-constable-k-khandelwal-m-yuan.json b/pytorchconf-2024/videos/keynote-pytorch-technical-deep-dive-p-bialecki-p-wu-w-constable-k-khandelwal-m-yuan.json new file mode 100644 index 000000000..5dff0a593 --- /dev/null +++ b/pytorchconf-2024/videos/keynote-pytorch-technical-deep-dive-p-bialecki-p-wu-w-constable-k-khandelwal-m-yuan.json @@ -0,0 +1,28 @@ +{ + "description": "Keynote: PyTorch Technical Deep Dive - Piotr Bialecki, NVIDIA; Peng Wu, Will Constable, Kartikay Khandelwal & Mengtao (Martin) Yuan, Meta\n\nThis Deep Dive provides an update on PyTorch development since last conference and dives into the key new features coming in PyTorch 2.5 and beyond. We will explore how advancements across a number of PyTorch features combine to better support the full model development lifecycle across training, fine-tuning, and deployment.", + "duration": 3055, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Piotr Bialecki", + "Peng Wu", + "Will Constable", + "Kartikay Khandelwal", + "Mengtao Yuan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/d29dGW8FafI/maxresdefault.webp", + "title": "Keynote: PyTorch Technical Deep Dive", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=d29dGW8FafI" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-ray-a-distributed-framework-for-heterogeneous-computing-ion-stoica-uc-berkeley.json b/pytorchconf-2024/videos/keynote-ray-a-distributed-framework-for-heterogeneous-computing-ion-stoica-uc-berkeley.json new file mode 100644 index 000000000..a318524d5 --- /dev/null +++ b/pytorchconf-2024/videos/keynote-ray-a-distributed-framework-for-heterogeneous-computing-ion-stoica-uc-berkeley.json @@ -0,0 +1,24 @@ +{ + "description": "Keynote: Ray: A Distributed Framework for Heterogeneous Computing - Ion Stoica, Professor, UC Berkeley\n\nRay has recently become the framework of choice for scaling machine learning workloads\u2014from data preprocessing, to training, fine-tuning, and serving. This talk will highlight Ray\u2019s key features responsible for its flexibility and generality, as well as its recent support for GPUs.", + "duration": 1006, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Ion Stoica" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/E98Pv36f0YI/maxresdefault.webp", + "title": "Keynote: Ray: A Distributed Framework for Heterogeneous Computing", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=E98Pv36f0YI" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-welcome-back-opening-remarks.json b/pytorchconf-2024/videos/keynote-welcome-back-opening-remarks.json new file mode 100644 index 000000000..c0cccf9e7 --- /dev/null +++ b/pytorchconf-2024/videos/keynote-welcome-back-opening-remarks.json @@ -0,0 +1,22 @@ +{ + "description": "Keynote: Welcome Back & Opening Remarks", + "duration": 160, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/fabW4B5IlVg/maxresdefault.webp", + "title": "Keynote: Welcome Back & Opening Remarks", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=fabW4B5IlVg" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-welcome-opening-remarks-matt-white-executive-director-pytorch-foundation.json b/pytorchconf-2024/videos/keynote-welcome-opening-remarks-matt-white-executive-director-pytorch-foundation.json new file mode 100644 index 000000000..70c76672a --- /dev/null +++ b/pytorchconf-2024/videos/keynote-welcome-opening-remarks-matt-white-executive-director-pytorch-foundation.json @@ -0,0 +1,24 @@ +{ + "description": "Keynote: Welcome & Opening Remarks - Matt White, Executive Director, PyTorch Foundation\n\nOver the past few years, and especially since the deployment of ChatGPT in November 2022, neural language models with billions of parameters and trained on trillions of words are powering the fastest-growing computing applications in history and generating discussion and debate across society. However, AI scientists cannot study or improve those state-of-the-art models because the models' parameters, training data, code, and even documentation are not openly available. In this talk, I present our OLMo project toward building strong language models and making them fully open to researchers along with open-source code for data management, training, inference, and interaction. In particular, I describe DOLMa, a 3T token open dataset curated for training language models, Tulu, our instruction-tuned language model, and OLMo v1, a fully-open 7B parameter language model trained from scratch.", + "duration": 626, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Matt White" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/9VFHuGaq7SU/maxresdefault.webp", + "title": "Keynote: Welcome & Opening Remarks", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=9VFHuGaq7SU" + } + ] +} diff --git a/pytorchconf-2024/videos/keynote-why-you-should-think-twice-before-paying-for-an-evaluation-tool-chip-huyen-voltron-data.json b/pytorchconf-2024/videos/keynote-why-you-should-think-twice-before-paying-for-an-evaluation-tool-chip-huyen-voltron-data.json new file mode 100644 index 000000000..2cd66a8ca --- /dev/null +++ b/pytorchconf-2024/videos/keynote-why-you-should-think-twice-before-paying-for-an-evaluation-tool-chip-huyen-voltron-data.json @@ -0,0 +1,24 @@ +{ + "description": "Keynote: Why You Should Think Twice Before Paying for an Evaluation Tool - Chip Huyen, VP of AI & OSS, Voltron Data\n\nOpen-ended evaluation is hard, and the number of evaluation tools has exploded in response to this challenge. However, if tools could solve evaluation, evaluation would have been solved by now. While the right tools can make your life easier, this talk discusses why you should think twice before outsourcing your evaluation to an external tool.", + "duration": 949, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Chip Huyen" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/IbKKPeuieFw/maxresdefault.webp", + "title": "Keynote: Why You Should Think Twice Before Paying for an Evaluation Tool", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=IbKKPeuieFw" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-a-whirlwind-tour-of-pytorch-extension-points-alban-desmaison-meta.json b/pytorchconf-2024/videos/lightning-talk-a-whirlwind-tour-of-pytorch-extension-points-alban-desmaison-meta.json new file mode 100644 index 000000000..b6e919bd0 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-a-whirlwind-tour-of-pytorch-extension-points-alban-desmaison-meta.json @@ -0,0 +1,26 @@ +{ + "description": "Journey across the PyTorch stack and see all the extension points that exist from nn.Module to the c++ Dispatcher through autograd and subclasses. This sessions will cover example use cases and when each one should be used while pointing to reference for in-depth details.", + "duration": 848, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Alban Desmaison" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/1A54cUPXhZ4/maxresdefault.webp", + "title": "A Whirlwind Tour of PyTorch Extension Points", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=1A54cUPXhZ4" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-aotriton-ahead-of-time-triton-kernel-libraries-on-rocm-jeff-daily-amd.json b/pytorchconf-2024/videos/lightning-talk-aotriton-ahead-of-time-triton-kernel-libraries-on-rocm-jeff-daily-amd.json new file mode 100644 index 000000000..3290f88b7 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-aotriton-ahead-of-time-triton-kernel-libraries-on-rocm-jeff-daily-amd.json @@ -0,0 +1,26 @@ +{ + "description": "Scaled dot product attention provides significant acceleration of the transformer layer through fusion of the multihead attention layer. There are several different algorithms to achieve this but tiled attention through scaled dot product attention via Flash Attention is a very popular approach. In PyTorch on the ROCm platform this is currently achieved through ahead of time compiled (AOT) Triton kernels in a linkable archive. AMD\u2019s work to enable and package these kernels is done through AOTriton, which aims to use Triton\u2019s compiler and GPU kernels for faster development. AOTriton maintains an optimized set of tiling sizes and other parameters to provide optimized, pre-compiled Triton kernels. The differences between JIT and AOT are few but are very important. Despite this, prototyping kernels in Triton is much faster than template-based C++ libraries. In this presentation we will go into detail on the interaction layer between PyTorch and AOTriton, the structure of AOTriton and how to add new triton kernels to AOTriton.", + "duration": 692, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jeff Daily" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/bhplJt1XAMI/maxresdefault.webp", + "title": "AOTriton: Ahead of Time Triton Kernel Libraries on ROCm", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=bhplJt1XAMI" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-beyond-zero-eliminating-vulnerabili-patrick-smyth-dan-fernandez-srishti-hegde.json b/pytorchconf-2024/videos/lightning-talk-beyond-zero-eliminating-vulnerabili-patrick-smyth-dan-fernandez-srishti-hegde.json new file mode 100644 index 000000000..d8c7b3fe8 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-beyond-zero-eliminating-vulnerabili-patrick-smyth-dan-fernandez-srishti-hegde.json @@ -0,0 +1,28 @@ +{ + "description": "Container images are increasingly the future of production applications at scale, providing reproducibility, robustness, and transparency. As PyTorch images get deployed to production, however, security becomes a major concern. PyTorch has a large attack surface, and building secure PyTorch images can be a challenge. Currently, the official PyTorch runtime container image has 30 CVEs (known vulnerabilities) rated critical and 256 CVE rated high. Improving this situation could secure many deployments that incorporate PyTorch for cloud-based inference or training. In this fast-paced session, we'll take a deep dive on the official PyTorch image from a vulnerability mitigation perspective, looking hard at included packages, executables, and active CVE. We'll identify low-hanging fruit for increasing security, including stripping bloat and building fresh. We'll also talk about the next level of security practiced in Chainguard's PyTorch image builds, such as including SBOMs and going distroless. Finally, we'll consider emerging tools and approaches for analyzing AI artifacts such as models and how these systems can benefit PyTorch in production.", + "duration": 736, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Patrick Smyth", + "Dan Fernandez", + "Srishti Hegde" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/1klynk1dxYA/maxresdefault.webp", + "title": "Beyond Zero: Eliminating Vulnerabilities in PyTorch Container Images", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=1klynk1dxYA" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-building-and-supporting-the-chinese-pytorch-community-resources-tu-zong-zesheng.json b/pytorchconf-2024/videos/lightning-talk-building-and-supporting-the-chinese-pytorch-community-resources-tu-zong-zesheng.json new file mode 100644 index 000000000..2b50f461a --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-building-and-supporting-the-chinese-pytorch-community-resources-tu-zong-zesheng.json @@ -0,0 +1,26 @@ +{ + "description": "Description: This proposal aims to provide a comprehensive introduction to the Chinese PyTorch community, we hope to inspire more users to join and contribute, fostering a vibrant and inclusive environment for PyTorch enthusiasts in China. Chinese PyTorch Homepage Introduction to the official Chinese version of the PyTorch website, highlighting its features. Navigation tips and key sections, such as documentation, tutorials, and community events. Improve the connection of users from China with PyTorch Community. Localized Tutorials and Documentation The 2.x version not have Translated version, it hard to catch up with latest features of PyTorch if the beginner not good at English. We translated official documents and tutorials, covering everything from basic PyTorch concepts to advanced applications. Interactive tutorials No interactive tutorials(Like Google Colab) for Chinese students or beginners before, they have to setup environment before start with PyTorch, which might be hard for beginners. And now, an online notebook & tutorials are available to practice or tuning steps for beginners.", + "duration": 453, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Zong Zesheng" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/AXMN5oUyWKE/maxresdefault.webp", + "title": "Building and Supporting the Chinese PyTorch Community: Resources, Tutorials, and Engagement", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=AXMN5oUyWKE" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-d-matrix-llm-compression-flow-based-on-torch-fx-simplify-zifei-xu-tristan-webb.json b/pytorchconf-2024/videos/lightning-talk-d-matrix-llm-compression-flow-based-on-torch-fx-simplify-zifei-xu-tristan-webb.json new file mode 100644 index 000000000..eb573b7f9 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-d-matrix-llm-compression-flow-based-on-torch-fx-simplify-zifei-xu-tristan-webb.json @@ -0,0 +1,27 @@ +{ + "description": "We introduce dmx-compressor, d-Matrix's open-source LLM compression toolkit that is modular, robust, efficient, and user-friendly. It utilizes symbolic tracing and fx.Transformer for network compression while keeping the model a first-class citizen in PyTorch for the user, despite prevalent graph dynamism in LLMs. It achieves this by maintaining both the original nn.Module and a just-in-time (JIT) traced and transformed fx.GraphModule representation behind the scenes, in conjunction with an abstraction that cleanly decouples network compression from the original model graph definition. This design allows the FXIR to dynamically adapt to diverse forward call signatures and flow-control arguments throughout quantization-aware training and post-training quantization written in plain PyTorch, yielding a compressed FXIR fully compatible with application-level APIs like the Hugging Face pipeline. We also provide a graph visualizer based on fx.Interpreter for ease of debugging. We believe this project shall empower the community to build efficient LLMs for deployment on custom hardware accelerators and contribute to the PyTorch ecosystem.", + "duration": 822, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Zifei Xu", + "Tristan Webb" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi/cw6dSlmMwj8/maxresdefault.jpg", + "title": "d-Matrix LLM Compression Flow Based on Torch.Fx: Simplifying PTQ/QAT", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=cw6dSlmMwj8" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-debiasing-the-data-lifecycle-shailvi-wakhlu-shailvi-ventures-llc.json b/pytorchconf-2024/videos/lightning-talk-debiasing-the-data-lifecycle-shailvi-wakhlu-shailvi-ventures-llc.json new file mode 100644 index 000000000..e10f0f41a --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-debiasing-the-data-lifecycle-shailvi-wakhlu-shailvi-ventures-llc.json @@ -0,0 +1,26 @@ +{ + "description": "Biased data, results in biased decision-making. Making sure that at every step of the data lifecycle, we make conscious attempts to debias the data is an important responsibility for all data scientists. In this talk, I highlight the typical data lifecycle, and how to prevent biases at every step. ---- The key takeaways from my talk include: 1) Understanding the data lifecycle 2) What are the typical ways biases creep in 3) How we can proactively prevent and fix biases in data", + "duration": 661, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Shailvi Wakhlu" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi/V9NVEceY9Wg/maxresdefault.jpg", + "title": "Debiasing the Data Lifecycle", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=V9NVEceY9Wg" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-distributing-a-million-open-models-in-the-wild-lessons-learned-f-omar-sanseviero.json b/pytorchconf-2024/videos/lightning-talk-distributing-a-million-open-models-in-the-wild-lessons-learned-f-omar-sanseviero.json new file mode 100644 index 000000000..08d117ca1 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-distributing-a-million-open-models-in-the-wild-lessons-learned-f-omar-sanseviero.json @@ -0,0 +1,26 @@ +{ + "description": "The Hugging Face Hub has over 300,000 PyTorch models. Distributing such number of models poses challenges. In this talk, Omar will share how the community has tackled these challenges, including techniques to ensure torch model security and tooling for researchers to share their models. He'll also take attendees on a journey through the evolution of torch models distributed by the community, highlighting new trends and directions. Attending this talk will give attendees practical insights into the latest developments in model distribution and ecosystem trends.", + "duration": 579, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Omar Sanseviero" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/t3AHZzz8Ong/maxresdefault.webp", + "title": "Distributing a Million Open Models in the Wild: Lessons Learned from the Hugging Face Hub", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=t3AHZzz8Ong" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-empowering-developers-tools-and-resources-for-running-generative-a-pareena-verma.json b/pytorchconf-2024/videos/lightning-talk-empowering-developers-tools-and-resources-for-running-generative-a-pareena-verma.json new file mode 100644 index 000000000..2eff22565 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-empowering-developers-tools-and-resources-for-running-generative-a-pareena-verma.json @@ -0,0 +1,30 @@ +{ + "description": "As the demand for accessible and scalable AI solutions grows, leveraging CPUs for generative AI offers significant advantages in cost, energy efficiency and widespread availability. This sessions aims to equip developers with the ecosystem of tools, resources and technical content needed to effectively run generative AI use cases on Arm CPUs. We have launched a range of easily digestible tutorials for developers, part of our Learning Paths on https://learn.arm.com/, which demonstrate how you can easily and efficiently run small and large language models on Arm-based devices. Learn about end-to-end workflows to accelerate PyTorch based sentiment analysis models from Hugging Face on Arm servers with optimizations in Arm Compute Library kernels for fp32 and bfloat16. Use the new KleidiAI library to accelerate LLMs with AI frameworks and build an Android chat app on your Arm mobile device with ExecuTorch, and XNNPACK. Find out about our roadmap for learning content demonstrating the feasibility and successful deployment of generative AI on Arm-based devices. Help us shape the support that we offer developers.", + "duration": 738, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + }, + { + "label": "https://learn.arm.com/,", + "url": "https://learn.arm.com/," + } + ], + "speakers": [ + "Pareena Verma" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/wkzBHF9CL5s/maxresdefault.webp", + "title": "Empowering Developers: Tools and Resources for Running Generative AI on Arm CPUs", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=wkzBHF9CL5s" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-extending-pytorch-with-custom-python-c-cuda-operators-richard-zou-meta.json b/pytorchconf-2024/videos/lightning-talk-extending-pytorch-with-custom-python-c-cuda-operators-richard-zou-meta.json new file mode 100644 index 000000000..2104544ca --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-extending-pytorch-with-custom-python-c-cuda-operators-richard-zou-meta.json @@ -0,0 +1,26 @@ +{ + "description": "In this talk, we'll go over the new recommended APIs to extend PyTorch with custom Python/C++/CUDA operators. Users have been able to extend PyTorch with custom operators for years but we have updated our guidance for creating custom operators that compose with torch.compile, autograd, and other PyTorch subsystems.", + "duration": 511, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Richard Zou" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/LI3h8aVchwo/maxresdefault.webp", + "title": "Extending PyTorch with Custom Python/C++/CUDA Operators", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=LI3h8aVchwo" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-fast-scalable-distributed-training-with-streamingdataset-saaketh-narayan.json b/pytorchconf-2024/videos/lightning-talk-fast-scalable-distributed-training-with-streamingdataset-saaketh-narayan.json new file mode 100644 index 000000000..77a2f3d65 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-fast-scalable-distributed-training-with-streamingdataset-saaketh-narayan.json @@ -0,0 +1,26 @@ +{ + "description": "StreamingDataset makes training on large datasets from cloud storage as fast, cheap, and scalable as possible. It\u2019s specially designed for multi-node, distributed training for large models \u2014 maximizing correctness guarantees, performance, and ease of use. Key features include elastically deterministic training, instant mid-epoch resumption, effective shuffling, high training throughput, and flexible data mixing, among other features. When training with StreamingDataset, the data shards are written to cloud storage in MDS, our file format that allows for low-latency random access to samples. By being as efficient as possible with shard downloads and shuffling, StreamingDataset minimizes egress costs while ensuring that dataloading never bottlenecks model training. StreamingDataset powers training for LLMs with over 100 billion parameters like DBRX, to advanced diffusion models, to two-tower recommendation models, and more, scaling to training jobs on thousands of GPUs with ease. Join us to learn how StreamingDataset can elevate your distributed model training experience.", + "duration": 423, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Saaketh Narayan" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/C1TpUZjqTfA/maxresdefault.webp", + "title": "Fast, Scalable Distributed Training with StreamingDataset", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=C1TpUZjqTfA" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-flexattention-the-flexibility-of-pytorch-the-performa-yanbo-liang-horace-he.json b/pytorchconf-2024/videos/lightning-talk-flexattention-the-flexibility-of-pytorch-the-performa-yanbo-liang-horace-he.json new file mode 100644 index 000000000..134926283 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-flexattention-the-flexibility-of-pytorch-the-performa-yanbo-liang-horace-he.json @@ -0,0 +1,27 @@ +{ + "description": "Introducing a novel abstraction leveraging the PyTorch compiler stack to enable custom, user-defined attention mechanisms. This new API supports dynamic modifications to attention scores within SDPA, providing both runtime and memory efficiency through kernel fusion with the FlashAttention algorithm.", + "duration": 1060, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Yanbo Liang", + "Horace He" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/ju-KlcuWlbk/maxresdefault.webp", + "title": "FlexAttention - The Flexibility of PyTorch + The Performance of FlashAttention", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ju-KlcuWlbk" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-hieroglyph2text-a-pytorch-powered-pipeline-for-automated-egyptian-h-susi-gentsch.json b/pytorchconf-2024/videos/lightning-talk-hieroglyph2text-a-pytorch-powered-pipeline-for-automated-egyptian-h-susi-gentsch.json new file mode 100644 index 000000000..d58aa176f --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-hieroglyph2text-a-pytorch-powered-pipeline-for-automated-egyptian-h-susi-gentsch.json @@ -0,0 +1,26 @@ +{ + "description": "HieroGlyph2Text is an innovative PyTorch-powered pipeline that automates the detection, classification, and attempts translation of Egyptian hieroglyphs from large image inputs. It addresses the challenge of decoding and translating ancient hieroglyphic inscriptions, traditionally a time-consuming and specialized task. This pipeline leverages PyTorch to create custom models: 1. Object Detection: YOLOv8 accurately detects individual hieroglyphs within images. 2. Image Classification: A custom ResNet model built using PyTorch achieves state-of-the-art accuracy in assigning Gardiner Codes to hieroglyphs. 3. Translation: The classified Gardiner Codes outputs from the ResNet model are integrated with Llama3, a large language model (LLM), using Retrieval-Augmented Generation (RAG) and a custom dataset based upon Gardiner Codes and their respective description and ideogram. Key highlights include accurate hieroglyph detection and state-of-the-art classification performance through an optimized ResNet model. This pipeline lays the groundwork for collaboration with subject matter experts to refine the translation process and democratize access to ancient Egyptian hieroglyphic knowledge.", + "duration": 564, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Susi Gentsch" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/eDjHKOb9Tn4/maxresdefault.webp", + "title": "HieroGlyph2Text: A PyTorch-Powered Pipeline for Automated Egyptian Hieroglyph Translation from Image", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=eDjHKOb9Tn4" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-implementing-and-using-iterable-datasets-what-could-go-wrong-nicolas-hug-meta.json b/pytorchconf-2024/videos/lightning-talk-implementing-and-using-iterable-datasets-what-could-go-wrong-nicolas-hug-meta.json new file mode 100644 index 000000000..345b526e7 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-implementing-and-using-iterable-datasets-what-could-go-wrong-nicolas-hug-meta.json @@ -0,0 +1,26 @@ +{ + "description": "PyTorch supports two kinds of datasets: Iterable datasets and indexable \"map-style\" datasets. Iterable datasets can be more flexible and potentially faster than their indexable cousins. They are also much harder to use correctly, and can easily lead to silently wrong results. This talk is a quick and fun intro to some of the traps that Iterable datasets lay out for you, with some tips to help you avoid them.", + "duration": 674, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Nicolas Hug" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/Kx0BoPa9juQ/maxresdefault.webp", + "title": "Implementing and Using Iterable Datasets: What Could Go Wrong?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Kx0BoPa9juQ" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-in-transit-machine-learning-using-pytorch-on-frontier-exascale-system-vineeth-gutta.json b/pytorchconf-2024/videos/lightning-talk-in-transit-machine-learning-using-pytorch-on-frontier-exascale-system-vineeth-gutta.json new file mode 100644 index 000000000..bdd6864cd --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-in-transit-machine-learning-using-pytorch-on-frontier-exascale-system-vineeth-gutta.json @@ -0,0 +1,26 @@ +{ + "description": "Traditional ML workflows use offline training where the data is stored on disk and is subsequently loaded into accelerator (CPU,GPU, etc) memory during training or inference. We recently devised a novel and scalable in-transit ML workflow for a plasma-physics application (chosen as 1 out of 8 compelling codes in the country) for the world\u2019s fastest supercomputer, Frontier) with an aim to build a high-energy laser particle accelerator. Data generated in distributed HPC systems like Frontier create volumes of data that is infeasible to store on HPC file systems. A mismatch between modern memory hierarchies occurs due to high volume and rate of data generation. Our novel ML workflow utilizes continuous learning where the data is consumed in batches as the simulation produces the data and then discards after each batch is trained. This in-transit workflow integrates particle-in-cell simulations with distributed ML training on PyTorch using DDP allows for an application coupling enabling the model to learn correlations between emitted radiation and particle dynamics within simulation in an unsupervised method. This workflow is demonstrated at scale on Frontier using 400 AMD MI250X GPUs", + "duration": 725, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Vineeth Gutta" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/s4Gh2UX0EyA/maxresdefault.webp", + "title": "In-Transit Machine Learning Using PyTorch on Frontier Exascale System", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=s4Gh2UX0EyA" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-introduction-to-torch-distributed-pipelining-howard-huang-ke-wen-meta.json b/pytorchconf-2024/videos/lightning-talk-introduction-to-torch-distributed-pipelining-howard-huang-ke-wen-meta.json new file mode 100644 index 000000000..bb3955b38 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-introduction-to-torch-distributed-pipelining-howard-huang-ke-wen-meta.json @@ -0,0 +1,27 @@ +{ + "description": "Pipeline parallelism is a technique employed in distributed deep learning that enhances model execution by dividing the model into distinct segments, or \"stages.\" As large language models and other memory-intensive models become more common, pipeline parallelism has grown increasingly important for several key areas: - Executing large-scale training jobs. - Enhancing performance in bandwidth-limited clusters. - Supporting large model inference. In this talk, we will introduce the `torch.distributed.pipelining` package which provides users a seamless way of applying pipeline parallelism. We will demonstrate the following features: - Splitting of model code based on simple specification. - Support for pipeline schedules, including GPipe, 1F1B, Interleaved 1F1B and Looped BFS, and providing the infrastructure for writing customized schedules. - Composability with other PyTorch parallel techniques such as data parallel (DDP, FSDP) or tensor parallel. - Out of the box integration with Hugging Face models for efficient inference.", + "duration": 765, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Howard Huang", + "Ke Wen" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/YieoyQll4_4/maxresdefault.webp", + "title": "Introduction to Torch.Distributed.Pipelining", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=YieoyQll4_4" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-llms-on-edge-with-ai-accelerators-chen-lai-kimish-patel-cemal-bilgin-meta.json b/pytorchconf-2024/videos/lightning-talk-llms-on-edge-with-ai-accelerators-chen-lai-kimish-patel-cemal-bilgin-meta.json new file mode 100644 index 000000000..5ff60315d --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-llms-on-edge-with-ai-accelerators-chen-lai-kimish-patel-cemal-bilgin-meta.json @@ -0,0 +1,28 @@ +{ + "description": "LLMs are known to be compute heavy and consume lots of resources (almost all resources on phones), including memory and power. A natural thought is to leverage the AI hardware accelerators, for example, Apple Neural Engine (ANE) on Apple devices and HTP on Qualcomm SoCs, to make it run fast and efficiently. Only by optimizing the model latency, memory consumption and power usage to a certain level will users be interested in installing the models on their devices. In this session, we\u2019d like to introduce how we leverage these AI accelerators within the PyTorch ecosystem to achieve the state-of-art performance for llama3 on device, via ExecuTorch and the partnership with Apple and Qualcomm. Hardware companies usually have their own AI accelerators. Likely they have different characteristics, one may support a list of different operators than others, and one may only support static shapes (like HTP). However, transformers-based optimization can be generic. We\u2019ll discuss in more detail how we apply the generic optimization as well as the backend specific optimization. The techniques we applied here are not just for LLMs, but can be applied to other transformer-based models.", + "duration": 729, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Chen Lai", + "Kimish Patel", + "Cemal Bilgin" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/8p8Pntnr3GU/maxresdefault.webp", + "title": "LLMs on Edge with AI Accelerators", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=8p8Pntnr3GU" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-low-precision-dtypes-in-pytorch-vasiliy-kuznetsov-meta.json b/pytorchconf-2024/videos/lightning-talk-low-precision-dtypes-in-pytorch-vasiliy-kuznetsov-meta.json new file mode 100644 index 000000000..6379c868b --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-low-precision-dtypes-in-pytorch-vasiliy-kuznetsov-meta.json @@ -0,0 +1,26 @@ +{ + "description": "This talk deep dives into the new native PyTorch float8 training library, and previews PyTorch's strategy for supporting upcoming low precision dtypes such as float6, float4 and MX for efficient training and inference.", + "duration": 530, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Vasiliy Kuznetsov" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/xcKwEZ77Cps/maxresdefault.webp", + "title": "Low Precision Dtypes in PyTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=xcKwEZ77Cps" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-making-the-most-of-heterogeneous-memory-capacity-using-pytorch-syed-ahmed-nvidia.json b/pytorchconf-2024/videos/lightning-talk-making-the-most-of-heterogeneous-memory-capacity-using-pytorch-syed-ahmed-nvidia.json new file mode 100644 index 000000000..86afbddeb --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-making-the-most-of-heterogeneous-memory-capacity-using-pytorch-syed-ahmed-nvidia.json @@ -0,0 +1,26 @@ +{ + "description": "Memory intensive deep learning workloads require efficient use of all kinds of memories that are available in a system. In this session, we will discuss how we can utilize such heterogeneous memory through memory pools in PyTorch. We will show how to mix-and-match different CUDA system allocators in the same PyTorch program using memory pools. Consequently, this API unlocks new use cases such as Extended GPU Memory (EGM) based all-gathers, Unified Virtual Memory (UVM), and NVLink Sharp (NVLS) reductions. New NVIDIA architectures accelerate such use cases with high-bandwidth and low-latency interconnects in the hardware, driven by extended functionality of CUDA system allocators in the software. Learn how to use these techniques on memory-intensive deep learning models like LLMs, and discover new CUDA features powered by PyTorch.", + "duration": 686, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Syed Ahmed" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/srQOK1UusZ4/maxresdefault.webp", + "title": "Making the Most of Heterogeneous Memory Capacity Using PyTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=srQOK1UusZ4" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-mobile-computational-photography-with-pytorch-low-light-denoising-alexis-baudron.json b/pytorchconf-2024/videos/lightning-talk-mobile-computational-photography-with-pytorch-low-light-denoising-alexis-baudron.json new file mode 100644 index 000000000..918bfd293 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-mobile-computational-photography-with-pytorch-low-light-denoising-alexis-baudron.json @@ -0,0 +1,26 @@ +{ + "description": "Over the last decade, smartphone cameras have improved significantly, becoming the primary device people use for capturing everyday moments and high-quality photographs. This progress is largely due to advances in computational photography and novel image sensors. Computational photography enables great images from compact mobile cameras, enhancing photos through various techniques such as multi-shot merging. Despite these advancements, challenges such as noise, artifacts, and distortions persist, especially in low-light conditions where limited light increases noise levels. In this lightning talk, we will explore how PyTorch can be used to design and optimize deep learning networks for real-time low-light denoising. We will dive into noise modeling, data generation, physics-aware models, and advanced network architectures for effective denoising in challenging low-light scenarios. Attendees will gain practical insights into the latest advancements in mobile computational photography using PyTorch.", + "duration": 507, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Alexis Baudron" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/hjOtsOqPw3Y/maxresdefault.webp", + "title": "Mobile Computational Photography with PyTorch: Low-Light Denoising", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hjOtsOqPw3Y" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-new-activation-checkpointing-apis-in-pytorch-jeffrey-wan-horace-he-meta.json b/pytorchconf-2024/videos/lightning-talk-new-activation-checkpointing-apis-in-pytorch-jeffrey-wan-horace-he-meta.json new file mode 100644 index 000000000..7e6ecd65b --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-new-activation-checkpointing-apis-in-pytorch-jeffrey-wan-horace-he-meta.json @@ -0,0 +1,27 @@ +{ + "description": "Activation checkpointing is a commonly used technique to reduce memory usage during model training by reducing the number of activations saved for backward. Instead of keeping tensors needed for backward alive until they are used in gradient computation during backward, those tensors are recomputed during the backward pass. This talk will introduce new activation checkpoint APIs that can help achieve a better trade off between memory savings and compute overhead that recomputing introduces.", + "duration": 750, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jeffrey Wan", + "Horace He" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/v3gsrJtGLiA/maxresdefault.webp", + "title": "New Activation Checkpointing APIs in PyTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=v3gsrJtGLiA" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-on-device-profiling-and-debugging-with-executorch-olivia-liu-vaun-puri-meta.json b/pytorchconf-2024/videos/lightning-talk-on-device-profiling-and-debugging-with-executorch-olivia-liu-vaun-puri-meta.json new file mode 100644 index 000000000..ca5c3d02c --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-on-device-profiling-and-debugging-with-executorch-olivia-liu-vaun-puri-meta.json @@ -0,0 +1,27 @@ +{ + "description": "High developer velocity is crucial to shipping new ML-enabled experiences from a server-trained model to a customers\u2019 device. ExecuTorch is an on-device runtime that seamlessly integrates with the PyTorch stack with a focus on developer productivity. We present the ExecuTorch Dev Tools and highlight key features that tighten the iteration loop when optimizing models for deployment and execution on edge devices. We demonstrate how ExecuTorch\u2019s built-in profiler and bundled tools tackle key pain-points, such as: 1. Examining the memory footprint of an ExecuTorch program ahead-of-time; 2. Collecting runtime performance metrics and intermediate outputs for accuracy analysis; 3. Correlating runtime data with the underlying graph of an exported model.", + "duration": 852, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Olivia Liu", + "Vaun Puri" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/_e6E6fnEIDQ/maxresdefault.webp", + "title": "On-Device Profiling and Debugging with ExecuTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=_e6E6fnEIDQ" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-optimized-pytorch-inference-on-aarch64-linux-cpus-sunita-nadampalli-amazon-aws.json b/pytorchconf-2024/videos/lightning-talk-optimized-pytorch-inference-on-aarch64-linux-cpus-sunita-nadampalli-amazon-aws.json new file mode 100644 index 000000000..5dd70a8e1 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-optimized-pytorch-inference-on-aarch64-linux-cpus-sunita-nadampalli-amazon-aws.json @@ -0,0 +1,26 @@ +{ + "description": "In the last 2 years we've optimized performance of PyTorch on Arm processors. The optimizations have included changes to ATen, C10, MKLDNN operators, GEMM backend, and Torch inductor. In many cases instead of writing our own kernel we integrated the Arm compute library, used fastmath kernels with format types like bf16, implemented operator caching, selected optimal backend based on the input context etc. Through these optimizations we improved performance by over 2x. In this presentation first we will talk about how we went across this process, what those optimizations are, performance numbers for AWS Graviton3 processors for around 75 models, and CI/CD workflow details. Next, we will walk through a sample PyTorch application showing basic usage, how to tune runtime and the resulting speed up. At the end of the presentation attendees will learn about PyTorch performance optimizations on Arm processors, how to use them, and the areas where they can collaborate to further improve PyTorch for aarch64 CPUs.", + "duration": 790, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Sunita Nadampalli" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/_9xohmSa2G8/maxresdefault.webp", + "title": "Optimized PyTorch Inference on aarch64 Linux CPUs", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=_9xohmSa2G8" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-pytorch-release-process-andrey-talman-meta.json b/pytorchconf-2024/videos/lightning-talk-pytorch-release-process-andrey-talman-meta.json new file mode 100644 index 000000000..e0e730184 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-pytorch-release-process-andrey-talman-meta.json @@ -0,0 +1,26 @@ +{ + "description": "I would like to present and quickly discuss PyTorch Release process, how it happens. What are milestones. What is our cherry-picking criteria, how we validate the release.", + "duration": 573, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Andrey Talman" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/4REnTJO3_ps/maxresdefault.webp", + "title": "PyTorch Release Process", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=4REnTJO3_ps" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-pytorch-xla-auto-sharding-yeounoh-chung-google.json b/pytorchconf-2024/videos/lightning-talk-pytorch-xla-auto-sharding-yeounoh-chung-google.json new file mode 100644 index 000000000..d195c05f4 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-pytorch-xla-auto-sharding-yeounoh-chung-google.json @@ -0,0 +1,26 @@ +{ + "description": "PyTorch/XLA recently launched the new PyTorch/XLA SPMD feature as a first-step to automate ML workloads parallelization using GSPMD. It turns out that the performance largely depends on the quality of sharding hints provided by the user \u2013 and it requires a correct and deep understanding of model architectures and much expertise to come up with optimal sharding hints. To address this problem, we propose to integrate PyTorch/XLA SPMD with XLA's auto sharding service that allows the XLA compiler to shard and optimize the whole model without any user input.", + "duration": 461, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Yeounoh Chung" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/gNfyS9asA7A/maxresdefault.webp", + "title": "PyTorch/XLA Auto-Sharding", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=gNfyS9asA7A" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-sparsifying-vision-transformers-with-minimal-accuracy-loss-jesse-cai-meta.json b/pytorchconf-2024/videos/lightning-talk-sparsifying-vision-transformers-with-minimal-accuracy-loss-jesse-cai-meta.json new file mode 100644 index 000000000..4eb22749f --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-sparsifying-vision-transformers-with-minimal-accuracy-loss-jesse-cai-meta.json @@ -0,0 +1,26 @@ +{ + "description": "Sparsity, like quantization, is an approximate model optimization technique, where we trade some model accuracy for increased performance.\n\nIn this talk we'll explore how to minimize the accuracy degradation of sparsifying Vision Transformer (ViT) based models to GPU accelerable sparsity patterns like block sparsity and semi-structured sparsity.\n\nWe'll cover the best techniques to ensure a less-than 5% loss in accuracy when:\n- training a sparse model from scratch\n- pruning and retraining an existing dense model\n- zero-shot/one-shot pruning a dense model\n\nWe've collected these techniques into a single repository, torchao, so that model optimization enthusiasts like you can sparsify your models with just a few lines of code.", + "duration": 841, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jesse Cai" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/mqoIfs5hTxA/maxresdefault.webp", + "title": "Sparsifying Vision Transformers with Minimal Accuracy Loss", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=mqoIfs5hTxA" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-understanding-and-optimizing-pytorch-models-with-thunder-luca-antiga-lightning-ai.json b/pytorchconf-2024/videos/lightning-talk-understanding-and-optimizing-pytorch-models-with-thunder-luca-antiga-lightning-ai.json new file mode 100644 index 000000000..15ac386ee --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-understanding-and-optimizing-pytorch-models-with-thunder-luca-antiga-lightning-ai.json @@ -0,0 +1,26 @@ +{ + "description": "A hallmark feature of PyTorch is the natural expression of computation. This enables practitioners to implement AI models with ease. However, it prompts the question how to optimize the workload for a given hardware setup because those optimizations clutter our code and are tricky to combine. Lightning Thunder provides a Python-to-Python compiler to scale and optimize PyTorch programs that focuses on usability, understandability, and extensibility. A key tool in delivering on these goals is the composability of transformations: without changing the user code, we can stack quantization, distributing the computation across multiple GPUs, dispatching to optimized kernels, offloading, and other pluggable optimizations. Lightning Thunder flourishes in the PyTorch ecosystem: with PyTorch eager and with executors like torch.compile and nvFuser. It also dispatches to libraries like cuDNN, TransformerEngine, Apex, OpenAI Triton. The ability to apply multiple optimizations just-in-time leads to significant compounded speed-ups over unoptimized code out of the box. Luca will discuss the design of Thunder and demonstrate applications on training and inference for large language and multimodal models.", + "duration": 837, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Luca Antiga" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/5dQ423cFfLc/maxresdefault.webp", + "title": "Understanding and Optimizing PyTorch Models with Thunder", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=5dQ423cFfLc" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-what-s-new-for-pytorch-developer-infrastructure-sahan-paliskara-catherine-lee.json b/pytorchconf-2024/videos/lightning-talk-what-s-new-for-pytorch-developer-infrastructure-sahan-paliskara-catherine-lee.json new file mode 100644 index 000000000..95e1dc968 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-what-s-new-for-pytorch-developer-infrastructure-sahan-paliskara-catherine-lee.json @@ -0,0 +1,27 @@ +{ + "description": "Having a chat about all of the work being done to continue supporting PyTorch's Developer Infrastructure needs including updates around Target Determination, Releases, and OSS Tooling.", + "duration": 671, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Sahan Paliskara", + "Catherine Lee" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/xlhBDySu7jM/maxresdefault.webp", + "title": "What's New for PyTorch Developer Infrastructure", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=xlhBDySu7jM" + } + ] +} diff --git a/pytorchconf-2024/videos/lightning-talk-whats-new-in-ex-angela-yi-tugsbayasgalan-manlaibaatar-avik-chaudhuri-yidi-wu.json b/pytorchconf-2024/videos/lightning-talk-whats-new-in-ex-angela-yi-tugsbayasgalan-manlaibaatar-avik-chaudhuri-yidi-wu.json new file mode 100644 index 000000000..ab652a0c0 --- /dev/null +++ b/pytorchconf-2024/videos/lightning-talk-whats-new-in-ex-angela-yi-tugsbayasgalan-manlaibaatar-avik-chaudhuri-yidi-wu.json @@ -0,0 +1,29 @@ +{ + "description": "This talk discusses updates we've made to torch.export this past year: (a) Non-strict mode, an alternative tracing mode which in practice covers more programs than TorchDynamo without compromising important soundness guarantees (b) Better dynamic shapes specifications through generating suggested fixes and runtime assertions (c) Control flow operators such as cond, map, and associative scan (d) A shift in the export generated IR, which will enable both training and inference (e) An unflattener, which will reconstruct the eager module structure from the flattened exported graph", + "duration": 814, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Angela Yi", + "Tugsbayasgalan Manlaibaatar", + "Avik Chaudhuri", + "Yidi Wu" + ], + "tags": [ + "Lightning Talk" + ], + "thumbnail_url": "https://i.ytimg.com/vi_webp/ap1TOf7p0iA/maxresdefault.webp", + "title": "What\u2019s New in Export?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ap1TOf7p0iA" + } + ] +} diff --git a/pytorchconf-2024/videos/maximizing-training-throughput-using-torch-compile-and-fsdp-l-chu-a-viros-i-martin-b-vaughan.json b/pytorchconf-2024/videos/maximizing-training-throughput-using-torch-compile-and-fsdp-l-chu-a-viros-i-martin-b-vaughan.json new file mode 100644 index 000000000..cc00fc031 --- /dev/null +++ b/pytorchconf-2024/videos/maximizing-training-throughput-using-torch-compile-and-fsdp-l-chu-a-viros-i-martin-b-vaughan.json @@ -0,0 +1,26 @@ +{ + "description": "torch.compile is a graph compilation technique that improves GPU utilization. A key challenge in getting torch.compile to perform well is to minimize (or eliminate) graph breaks, however, this isn't trivial as even the Llama implementation provided by Meta has many graph breaks resulting in reduced training throughput. In this talk we discuss 1. how we addressed these challenges in order to train a model using torch.compile 2. how we combined torch.compile with FSDP and selective activation checkpointing to achieve the maximum throughput for training 3. model quality comparison between models trained with compile and no-compile, and lastly 4. the best setup we have for different model sizes in the Llama family that achieves the maximum throughput and MFU number (e.g. 68% MFU for the 7B model on A100 GPUs!)", + "duration": 220, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Linsong Chu", + "Antoni Viros i Martin", + "Brian Vaughan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/_CuLeABf_fM/maxresdefault.webp", + "title": "Maximizing Training Throughput Using Torch.Compile and FSDP", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=_CuLeABf_fM" + } + ] +} diff --git a/pytorchconf-2024/videos/meta-llama-3-and-the-future-of-responsible-ai-development-spencer-whitman-vincent-gonguet-meta.json b/pytorchconf-2024/videos/meta-llama-3-and-the-future-of-responsible-ai-development-spencer-whitman-vincent-gonguet-meta.json new file mode 100644 index 000000000..82eb196ac --- /dev/null +++ b/pytorchconf-2024/videos/meta-llama-3-and-the-future-of-responsible-ai-development-spencer-whitman-vincent-gonguet-meta.json @@ -0,0 +1,25 @@ +{ + "description": "As AI models become increasingly powerful and pervasive, trust and safety have become top priorities. Join us for a timely talk on Llama 3, our latest foundation model, and the cutting-edge trust and safety models and tools we've developed to ensure responsible AI development. In this talk, we'll dive into: \u2022The advancements of Llama 3 and its applications \u2022Our innovative trust and safety approaches, including toxicity detection and mitigation \u2022The open-source tools and resources we're sharing to empower the community Discover how Meta is pushing the boundaries of trust and safety and learn how you can integrate these solutions into your own projects. Let's build a safer, more responsible AI future together!", + "duration": 1251, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Spencer Whitman", + "Vincent Gonguet" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/XOIuFIl2-Ao/maxresdefault.webp", + "title": "Meta Llama 3 and the Future of Responsible AI Development", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=XOIuFIl2-Ao" + } + ] +} diff --git a/pytorchconf-2024/videos/mlir-enabling-composition-of-kernels-and-compilers-jacques-pienaar-google.json b/pytorchconf-2024/videos/mlir-enabling-composition-of-kernels-and-compilers-jacques-pienaar-google.json new file mode 100644 index 000000000..b214dbc96 --- /dev/null +++ b/pytorchconf-2024/videos/mlir-enabling-composition-of-kernels-and-compilers-jacques-pienaar-google.json @@ -0,0 +1,24 @@ +{ + "description": "Hand written kernels and compilers have been part of the toolbox to provide efficient and broad coverage. These approaches have often been positioned as being at odds with one another - and indeed the software solutions either side have sometimes made it such. MLIR, since inception, aimed to enable general, beneficial composition instead. Rather than treating kernels as a black box escape hatch, treat it as a peer in solving the serving needs. This is not magic and requires consideration of how best to combine. In this talk I'll present the approach and effect of this both in IREE and OpenXLA.", + "duration": 672, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jacques Pienaar" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/Dx1fAE9fk8s/maxresdefault.webp", + "title": "[MLIR] Enabling Composition of Kernels and Compilers", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Dx1fAE9fk8s" + } + ] +} diff --git a/pytorchconf-2024/videos/mojo-lifting-pt-to-new-heights-with-max-and-mojo-mikhail-zolotukhin-modular.json b/pytorchconf-2024/videos/mojo-lifting-pt-to-new-heights-with-max-and-mojo-mikhail-zolotukhin-modular.json new file mode 100644 index 000000000..caac5fc1d --- /dev/null +++ b/pytorchconf-2024/videos/mojo-lifting-pt-to-new-heights-with-max-and-mojo-mikhail-zolotukhin-modular.json @@ -0,0 +1,24 @@ +{ + "description": "In this talk we'll peek into Modular's inference engine: how it builds on and works with PyTorch and what is unique about it. We will look into how Mojo language can be used to define performant kernels and what optimizations the inference engine can perform. We will also talk briefly about our experience of developing a third party backend for torch.compile.", + "duration": 572, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Mikhail Zolotukhin" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/JmHKhc6EGpg/maxresdefault.webp", + "title": "[MOJO] Lifting PT to New Heights with MAX and Mojo", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=JmHKhc6EGpg" + } + ] +} diff --git a/pytorchconf-2024/videos/panel-discussion-t-dettmers-h-schoelkopf-a-chowdhery-a-conneau-moderated-by-k-khandelwal.json b/pytorchconf-2024/videos/panel-discussion-t-dettmers-h-schoelkopf-a-chowdhery-a-conneau-moderated-by-k-khandelwal.json new file mode 100644 index 000000000..96759d049 --- /dev/null +++ b/pytorchconf-2024/videos/panel-discussion-t-dettmers-h-schoelkopf-a-chowdhery-a-conneau-moderated-by-k-khandelwal.json @@ -0,0 +1,27 @@ +{ + "description": "Speakers: Tim Dettmers, Hailey Schoelkopf, Aakanksha Chowdhery, Alexis Conneau\nAs open-source LLMs have become more capable, a substantial ecosystem has developed around the fine-tuning of these models. A thriving community of researchers, developers, practitioners and hobbyists has emerged which focuses on topics ranging from memory efficiency, parameter-efficient fine-tuning and quantization to performance at scale and reproducible evaluations. The goal of this mini-summit is to bring this community together to discuss ideas, share knowledge and build connections.\n\nThe agenda features a keynote from Joe Spisak on the state of the Llama ecosystem followed by invited talks from the founders of Axolotl, Unsloth and torchtune. We conclude the summit with a riveting discussion on what\u2019s next for LLMs, fine-tuning and the PyTorch ecosystem with a fabulous panel of experts - Tim Dettmers (author of bitsandbytes and QLoRA), Hailey Schoelkopf (maintainer of LM Eval Harness at EleutherAI), Aakanksha Chowdhery (Lead author on PaLM and Gemini) and Alexis Conneau (Research Lead at OpenAI)", + "duration": 2160, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Tim Dettmers", + "Hailey Schoelkopf", + "Aakanksha Chowdhery", + "Alexis Conneau" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/1VwOB7OYm4U/maxresdefault.webp", + "title": "Panel Discussion", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=1VwOB7OYm4U" + } + ] +} diff --git a/pytorchconf-2024/videos/pushing-the-performance-envelope-an-optimization-study-for-3-suvaditya-mukherjee-shireen-chand.json b/pytorchconf-2024/videos/pushing-the-performance-envelope-an-optimization-study-for-3-suvaditya-mukherjee-shireen-chand.json new file mode 100644 index 000000000..903353e34 --- /dev/null +++ b/pytorchconf-2024/videos/pushing-the-performance-envelope-an-optimization-study-for-3-suvaditya-mukherjee-shireen-chand.json @@ -0,0 +1,25 @@ +{ + "description": "This work explores performance optimization strategies for training 3D generative models using PyTorch. We focus on training Variational Autoencoders (VAEs) on the ShapeNet dataset, a popular benchmark for this task. Our objective is to achieve high-fidelity reconstructions while minimizing the computational footprint and training time. We focus on: 1) Large-scale 3D dataset loading strategies using PyTorch & Google Cloud Storage Buckets 2) Implementation details and insights for 3D VAEs using PyTorch 2.x 3) Training using Automatic Mixed-precision regimes 4) Optimized training using torch.compile and different quantization techniques (as supported) - Dynamic Quantization - Static Quantization - Static Quantization-aware Training 5) Comparative Benchmark over several experiments performed with a focus on execution time and memory footprint Through this comprehensive study, we present a comparative analysis of the performance gains achieved by our optimized models. Our findings present empirical insights into the trade-offs between model accuracy, computational complexity, and hardware resource utilization.", + "duration": 1153, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Suvaditya Mukherjee", + "Shireen Chand" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/Xm9Bex3LE_8/maxresdefault.webp", + "title": "Pushing the Performance Envelope: An Optimization Study for 3D Generative Modelling with PyTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Xm9Bex3LE_8" + } + ] +} diff --git a/pytorchconf-2024/videos/running-state-of-art-gen-ai-models-on-device-with-npu-acceleration-felix-baum-qualcomm.json b/pytorchconf-2024/videos/running-state-of-art-gen-ai-models-on-device-with-npu-acceleration-felix-baum-qualcomm.json new file mode 100644 index 000000000..43201a0a4 --- /dev/null +++ b/pytorchconf-2024/videos/running-state-of-art-gen-ai-models-on-device-with-npu-acceleration-felix-baum-qualcomm.json @@ -0,0 +1,24 @@ +{ + "description": "Since the boom of generative AI, the industry is now moving towards on-device AI inferencing, as it is not only a trend but a necessity now in order to save costs, achieve the best inference performance, ultra-low latency at the lowest power possible. In this session we go over the new features added on the Qualcomm AI Stack and how it works with the public release of ExecuTorch 1.0. We will discuss how to run traditional workloads as well as GenAI use cases including the latest version of Llama on the Mobile device while using Qualcomm Hexagon NPU.", + "duration": 1460, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Felix Baum" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/wd57g2IM3C4/maxresdefault.webp", + "title": "Running State-of-Art Gen AI Models on-Device with NPU Acceleration", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=wd57g2IM3C4" + } + ] +} diff --git a/pytorchconf-2024/videos/slaying-ooms-mark-saroufim-jane-xu-meta.json b/pytorchconf-2024/videos/slaying-ooms-mark-saroufim-jane-xu-meta.json new file mode 100644 index 000000000..2de2ee0df --- /dev/null +++ b/pytorchconf-2024/videos/slaying-ooms-mark-saroufim-jane-xu-meta.json @@ -0,0 +1,25 @@ +{ + "description": "Have you ever hit an OOM (and wished you had more VRAM)? Who hasn't! Hop on the bus with us and feel the road become smoother as we talk about stacking together techniques like FSDP2 + QLoRa + CPU Offloading + Fused ADAM (thanks Intel) + more in PyTorch native. We will give an overview of these techniques as well as the hard edges we solved in their composition. Curious for more? Or...still OOMing? We also plan on discussing our more researchy work on offloading, pagedness, and low precision optimizers.", + "duration": 1541, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Mark Saroufim", + "Jane Xu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/xzBcBJ8_rzM/maxresdefault.webp", + "title": "Slaying OOMs", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=xzBcBJ8_rzM" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-keynote-accelerating-ai-how-amd-and-pytorch-drive-innovation-with-sea-anush-elangovan.json b/pytorchconf-2024/videos/sponsored-keynote-accelerating-ai-how-amd-and-pytorch-drive-innovation-with-sea-anush-elangovan.json new file mode 100644 index 000000000..bc11cff13 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-keynote-accelerating-ai-how-amd-and-pytorch-drive-innovation-with-sea-anush-elangovan.json @@ -0,0 +1,24 @@ +{ + "description": "In this keynote presentation, we explore the robust collaboration between AMD and PyTorch that is propelling advancements in artificial intelligence and machine learning. Discover how AMD's commitment to Day-0 PyTorch support ensures that PyTorch users benefit from cutting-edge performance enhancements and out-of-the-box compatibility. We delve into the technical synergies that make AMD hardware an ideal choice for PyTorch frameworks, showcasing real-world examples of accelerated workflows and breakthrough AI applications. Join us to learn how this dynamic partnership is enabling researchers, developers, and data scientists to push the boundaries of innovation and achieve unprecedented results in their AI projects.", + "duration": 343, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Anush Elangovan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/paR73OPcdNw/maxresdefault.webp", + "title": "Accelerating AI: How AMD and PyTorch Drive Innovation with Seamless Day-0 Support and High Performance", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=paR73OPcdNw" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-keynote-enabling-ai-everywhere-with-pytorch-and-intel-kismat-singh-intel.json b/pytorchconf-2024/videos/sponsored-keynote-enabling-ai-everywhere-with-pytorch-and-intel-kismat-singh-intel.json new file mode 100644 index 000000000..af0f634b8 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-keynote-enabling-ai-everywhere-with-pytorch-and-intel-kismat-singh-intel.json @@ -0,0 +1,24 @@ +{ + "description": "Unlocking the availability of and access to generative AI technologies has great societal value. In this keynote, Kismat Singh will present how open software built on industry-standard frameworks such as PyTorch, and ubiquitous hardware from Intel that forms a large part of the current installed base across edge, PC and cloud are keys to democratizing AI and allowing new solutions to be implemented across industries ranging from healthcare, telecommunication, industrial and more. Kismat will share his thoughts on how software acceleration, flexibility and security are important factors in deploying AI applications in production and what he sees as challenges with those projects. He will also discuss Open Platform for Enterprise AI (OPEA), a new Linux Foundation AI and Data project that gives developers access to open source, standardized, modular, and heterogenous retrieval-augmented generation (RAG) pipelines that they can use for their enterprise-grade Generative AI deployments. Lastly, he will share some exciting Intel contributed features recently upstreamed into PyTorch. He will end the keynote by stating what he believes to be the future of AI and the part each of us will play in it!", + "duration": 358, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Kismat Singh" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/tGEtpXaoXuk/maxresdefault.webp", + "title": "Enabling AI Everywhere with PyTorch and Intel", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=tGEtpXaoXuk" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-keynote-from-containers-to-cognition-conducting-the-ai-orchestra-taylor-dolezal.json b/pytorchconf-2024/videos/sponsored-keynote-from-containers-to-cognition-conducting-the-ai-orchestra-taylor-dolezal.json new file mode 100644 index 000000000..27a04e867 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-keynote-from-containers-to-cognition-conducting-the-ai-orchestra-taylor-dolezal.json @@ -0,0 +1,24 @@ +{ + "description": "Let's explore the powerful harmony created when the CNCF and PyTorch communities join forces. This keynote highlights how the collaboration between cloud native experts and AI innovators is orchestrating a new era of technological symphonies. We'll touch on critical initiatives and shared victories that demonstrate the strength of this partnership. To illustrate the creative potential of this alliance, we'll briefly showcase a demo of how containerized workloads can produce unexpected melodies. Join us for this exploration of community-driven innovation, where containers and cognition come together to compose the future of technology.", + "duration": 306, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Taylor Dolezal" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/MWmOeXI17Kg/maxresdefault.webp", + "title": "From Containers to Cognition: Conducting the AI Orchestra", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=MWmOeXI17Kg" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-keynote-optimizing-ai-inference-for-large-language-models-mudhakar-srivatsa-ibm.json b/pytorchconf-2024/videos/sponsored-keynote-optimizing-ai-inference-for-large-language-models-mudhakar-srivatsa-ibm.json new file mode 100644 index 000000000..ed2eb11d4 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-keynote-optimizing-ai-inference-for-large-language-models-mudhakar-srivatsa-ibm.json @@ -0,0 +1,24 @@ +{ + "description": "This talk will cover two new ways IBM has optimized generative AI inferencing with PyTorch: speculative decoding and Triton kernel development. Speculative decoding leverages predictive modeling to reduce latency by anticipating potential outputs, streamlining the inference process without sacrificing accuracy. IBM Research's team developed new speculative architectures and open sourced speculators for LLama3 models. It will also discuss various Triton kernels to accelerate inference, one of which was contributed to vLLM for accelerating MoE models. Finally, it will share a glimpse of IBM's AI hardware work, including how the IBM Artificial Intelligence Unit (AIU) could integrate into the PyTorch stack.", + "duration": 349, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Mudhakar Srivatsa" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/ZpNyKs4EpVI/maxresdefault.webp", + "title": "Optimizing AI Inference for Large Language Models", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ZpNyKs4EpVI" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-keynote-the-lightning-ai-oss-stack-for-accelerating-the-ai-lifecycle-luca-antiga.json b/pytorchconf-2024/videos/sponsored-keynote-the-lightning-ai-oss-stack-for-accelerating-the-ai-lifecycle-luca-antiga.json new file mode 100644 index 000000000..892cda6cd --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-keynote-the-lightning-ai-oss-stack-for-accelerating-the-ai-lifecycle-luca-antiga.json @@ -0,0 +1,24 @@ +{ + "description": "We introduce the Lightning AI open source stack, a high-performance stack for training, fine-tuning, and deploying AI systems that augments the PyTorch ecosystem.\n\nToday PyTorch Lightning powers training workloads across the industry, from small-scale research to large-scale training endeavors. The package has reached 130M total downloads in June 2024, 2x since early 2023. PyTorch Lightning 2.4 features support for 2D parallelism via DTensors, first introduced in PyTorch 2.3.\n\nThe open source stack is completed by Fabric (lightweight building blocks for scaling training workloads), LitGPT (library for pre-training, fine-tuning, serving LLMs), LitData (parallel data processing and streaming data loading), LitServe (lightweight, high-performance serving framework), TorchMetrics (de-facto standard in deep learning metrics), and the recently released Thunder compiler. Together, these packages provide a low-friction, high-performance stack to democratize and accelerate the AI lifecycle.\n\nThe stack is optimized to run on Lightning Studios, a PyTorch native, fully integrated AI development environment on the cloud.", + "duration": 368, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Luca Antiga" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/jAtrviDGpwE/maxresdefault.webp", + "title": "The Lightning AI OSS Stack for Accelerating the AI Lifecycle", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=jAtrviDGpwE" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-session-accelerating-ai-innovation-high-performance-pyt-robert-suderman-ian-nordeng.json b/pytorchconf-2024/videos/sponsored-session-accelerating-ai-innovation-high-performance-pyt-robert-suderman-ian-nordeng.json new file mode 100644 index 000000000..b966d886c --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-session-accelerating-ai-innovation-high-performance-pyt-robert-suderman-ian-nordeng.json @@ -0,0 +1,25 @@ +{ + "description": "Explore the powerful collaboration between AMD and PyTorch, driving advancements in AI and machine learning. Learn how AMD\u2019s Day-0 PyTorch support delivers cutting-edge performance and seamless compatibility.\n\nThis session will highlight the technical synergies that make AMD hardware ideal choice for PyTorch frameworks, with real-world examples of accelerated workflows and breakthrough AI applications. Attendees will gain insights into how this dynamic partnership is enabling researchers, developers, and data scientists to push the boundaries of innovation and achieve unprecedented results in AI projects.", + "duration": 1710, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Robert Suderman", + "Ian Nordeng" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/h0LG3JbA8kc/maxresdefault.webp", + "title": "Accelerating AI Innovation: High Performance PyTorch at AMD", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=h0LG3JbA8kc" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-session-democratizing-ai-powering-the-future-with-arms-global-comp-gian-marco-iodice.json b/pytorchconf-2024/videos/sponsored-session-democratizing-ai-powering-the-future-with-arms-global-comp-gian-marco-iodice.json new file mode 100644 index 000000000..59979cec8 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-session-democratizing-ai-powering-the-future-with-arms-global-comp-gian-marco-iodice.json @@ -0,0 +1,24 @@ +{ + "description": "Arm is excited to be at the center of the world's largest compute ecosystem at the dawn of the AI era. A key tenant of our mission is to democratize AI capabilities, empowering millions of developers to put advanced AI features into the hands of billions of users.\n\nIn this presentation, we'll explore how Arm is enabling the world\u2019s leading open-source AI frameworks to leverage power-efficient Arm-based computing platforms and Arm architecture features, as a tool for enabling fast and secure AI workloads. The session focuses on how our strategic partnership with the Pytorch and Executorch community is enabling a seamless and transparent developer experience, to run workloads everywhere from cloud to edge. This session will highlight some of our optimized libraries, upstreamed contributions and a wealth of AI-related developer material to build the future of AI on Arm.", + "duration": 1823, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Gian Marco Iodice" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/0D_SAurBA0s/maxresdefault.webp", + "title": "Democratizing AI: Powering the Future with Arm\u2019s Global Compute Ecosystem", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=0D_SAurBA0s" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-session-nemo-aligner-a-scalable-toolkit-for-model-alignment-gerald-shen-jimmy-zhang.json b/pytorchconf-2024/videos/sponsored-session-nemo-aligner-a-scalable-toolkit-for-model-alignment-gerald-shen-jimmy-zhang.json new file mode 100644 index 000000000..f825d1af2 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-session-nemo-aligner-a-scalable-toolkit-for-model-alignment-gerald-shen-jimmy-zhang.json @@ -0,0 +1,25 @@ +{ + "description": "Aligning AI models with human values and preferences is essential for making them safe and helpful. However, building an efficient and scalable toolkit for alignment can be challenging, especially when applied to state of the art foundation models with billions or trillions of parameters. NeMo-Aligner is an open-source, optimized and scalable toolkit that implements alignment algorithms such as Reinforcement Learning from Human Feedback (RLHF), Direct Preference Optimization (DPO), SteerLM and Self-Play Fine Tuning (SPIN). This talk will introduce NeMo-Aligner and show the steps we took to design and optimize the toolkit around various alignment algorithms. In particular, we discuss the RLHF implementation where we observe close to 7x speedup and excellent scaling performance by adding TRT-LLM integration, carefully orchestrating communication and utilizing fast training kernels. We\u2019re able to align state-of-the-art open source models with NeMo-Aligner and hope our framework can enable the community to performantly customize, fine-tune and align foundational models at any scale.", + "duration": 1275, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Gerald Shen", + "Jimmy Zhang" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/rROnWTNF-_A/maxresdefault.webp", + "title": "NeMo-Aligner: A Scalable Toolkit for Model Alignment", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=rROnWTNF-_A" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-session-pytorch-support-by-google-enabling-perform-mark-sherwood-shauheen-zahirazami.json b/pytorchconf-2024/videos/sponsored-session-pytorch-support-by-google-enabling-perform-mark-sherwood-shauheen-zahirazami.json new file mode 100644 index 000000000..cc116b319 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-session-pytorch-support-by-google-enabling-perform-mark-sherwood-shauheen-zahirazami.json @@ -0,0 +1,25 @@ +{ + "description": "In this session we will cover various ways teams at google are working to help the Pytorch community achieve performance and scale from cloud to edge. We will cover how Google Cloud customers can use PyTorch and OpenXLA to get competitive performance for their ML workloads. We\u2019ll also cover how Google AI Edge Torch works with Pytorch to help developers integrate LLMs, vision models and more to easily create new edge applications that can run on a wide set of devices.", + "duration": 1157, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Mark Sherwood", + "Shauheen Zahirazami" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/CzviSkYjEkQ/maxresdefault.webp", + "title": "PyTorch Support by Google Enabling Performance from Cloud to Edge", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=CzviSkYjEkQ" + } + ] +} diff --git a/pytorchconf-2024/videos/sponsored-session-torchchat-a-showcase-of-pytorch-llm-ubiquity-jack-khuu-jesse-white-meta.json b/pytorchconf-2024/videos/sponsored-session-torchchat-a-showcase-of-pytorch-llm-ubiquity-jack-khuu-jesse-white-meta.json new file mode 100644 index 000000000..949225214 --- /dev/null +++ b/pytorchconf-2024/videos/sponsored-session-torchchat-a-showcase-of-pytorch-llm-ubiquity-jack-khuu-jesse-white-meta.json @@ -0,0 +1,25 @@ +{ + "description": "This talk explores the journey of enabling LLMs in the PyTorch ecosystem, as well as how the teams behind AOT Inductor, ExecuTorch, and torchao collaborated to create torchchat, a showcase of PyTorch\u2019s ability to run LLM inference everywhere.\n\nTorchchat demonstrates the ubiquity, simplicity, and quality of PyTorch\u2019s LLM support through performant, reproducible implementations for not only Python environments, but on desktop, server, and on-device as-well.\n\nAll of our work is open source and available on GitHub.", + "duration": 1459, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jack Khuu", + "Jesse White" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/hSJ9oD6tbeU/maxresdefault.webp", + "title": "Torchchat: A Showcase of PyTorch LLM Ubiquity", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hSJ9oD6tbeU" + } + ] +} diff --git a/pytorchconf-2024/videos/state-of-pytorch-ji-li-damien-sereni-meta.json b/pytorchconf-2024/videos/state-of-pytorch-ji-li-damien-sereni-meta.json new file mode 100644 index 000000000..bff4ba287 --- /dev/null +++ b/pytorchconf-2024/videos/state-of-pytorch-ji-li-damien-sereni-meta.json @@ -0,0 +1,25 @@ +{ + "description": "This talk gives a run through of who builds PyTorch, new and upcoming improvements to the framework and how to get involved. All thanks to our awesome community of contributors, partners and ecosystem tools.", + "duration": 1237, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Ji Li", + "Damien Sereni" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/ibgW_ali0Gc/maxresdefault.webp", + "title": "State of PyTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ibgW_ali0Gc" + } + ] +} diff --git a/pytorchconf-2024/videos/the-challenges-of-building-an-opinionated-open-source-llm-framework-wing-lian-axolotl-ai.json b/pytorchconf-2024/videos/the-challenges-of-building-an-opinionated-open-source-llm-framework-wing-lian-axolotl-ai.json new file mode 100644 index 000000000..eadf36b55 --- /dev/null +++ b/pytorchconf-2024/videos/the-challenges-of-building-an-opinionated-open-source-llm-framework-wing-lian-axolotl-ai.json @@ -0,0 +1,24 @@ +{ + "description": "As open-source LLMs have become more capable, a substantial ecosystem has developed around the fine-tuning of these models. A thriving community of researchers, developers, practitioners and hobbyists has emerged which focuses on topics ranging from memory efficiency, parameter-efficient fine-tuning and quantization to performance at scale and reproducible evaluations. The goal of this mini-summit is to bring this community together to discuss ideas, share knowledge and build connections.\n\nThe agenda features a keynote from Joe Spisak on the state of the Llama ecosystem followed by invited talks from the founders of Axolotl, Unsloth and torchtune. We conclude the summit with a riveting discussion on what\u2019s next for LLMs, fine-tuning and the PyTorch ecosystem with a fabulous panel of experts - Tim Dettmers (author of bitsandbytes and QLoRA), Hailey Schoelkopf (maintainer of LM Eval Harness at EleutherAI), Aakanksha Chowdhery (Lead author on PaLM and Gemini) and Alexis Conneau (Research Lead at OpenAI)", + "duration": 557, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Wing Lian" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/bp9xioJv0gw/maxresdefault.webp", + "title": "The Challenges of Building an Opinionated Open Source LLM Framework", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=bp9xioJv0gw" + } + ] +} diff --git a/pytorchconf-2024/videos/the-impact-and-challenges-of-open-source-generative-datasets-and-models-aaron-gokaslan.json b/pytorchconf-2024/videos/the-impact-and-challenges-of-open-source-generative-datasets-and-models-aaron-gokaslan.json new file mode 100644 index 000000000..324d40249 --- /dev/null +++ b/pytorchconf-2024/videos/the-impact-and-challenges-of-open-source-generative-datasets-and-models-aaron-gokaslan.json @@ -0,0 +1,24 @@ +{ + "description": "Open source generative models like OpenGPT2, BLOOM, and others have been pivotal in advancing AI technology. These models leverage extensive text data to achieve advanced linguistic capabilities. However, the trend towards proprietary tools and closed large language models is growing, posing unique challenges in open-source AI development. This discussion will explore the intricacies of training such models, the hurdles in dataset management, and the regulation of open-source contributions. We'll explore how to effectively iterate on collected data, prepare for extensive training sessions, and coordinate research across large open-source organizations. We will discuss the challenges of generative models in three different modalities: text, image, and genomics. The talk will draw from the speaker\u2019s personal experience on working on OpenWebText, OpenGPT2, BLOOM, CommonCanvas, Caduceus, and other generative models. We will also cover the changing AI environment and how the future of open souce is threatened by onerous regulation, ever increasing compute costs, and the commoditization of previously open data.", + "duration": 1815, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Aaron Gokaslan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/15ECoIeBf3c/maxresdefault.webp", + "title": "The Impact and Challenges of Open Source Generative Datasets and Models", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=15ECoIeBf3c" + } + ] +} diff --git a/pytorchconf-2024/videos/the-rise-of-transformers-in-the-growing-pytorch-ecosystem-arthur-zucker-hugging-face.json b/pytorchconf-2024/videos/the-rise-of-transformers-in-the-growing-pytorch-ecosystem-arthur-zucker-hugging-face.json new file mode 100644 index 000000000..69538970a --- /dev/null +++ b/pytorchconf-2024/videos/the-rise-of-transformers-in-the-growing-pytorch-ecosystem-arthur-zucker-hugging-face.json @@ -0,0 +1,24 @@ +{ + "description": "Explore how the `tranformers` library grows and adapts to the fast paced and ever-changing AI field to bring the best to the AI community", + "duration": 1117, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Arthur Zucker" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/P-asaQVmA3o/maxresdefault.webp", + "title": "The Rise of `Transformers` in the Growing PyTorch Ecosystem", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=P-asaQVmA3o" + } + ] +} diff --git a/pytorchconf-2024/videos/the-state-of-the-llama-ecosystem-joe-spisak-meta.json b/pytorchconf-2024/videos/the-state-of-the-llama-ecosystem-joe-spisak-meta.json new file mode 100644 index 000000000..41cd96901 --- /dev/null +++ b/pytorchconf-2024/videos/the-state-of-the-llama-ecosystem-joe-spisak-meta.json @@ -0,0 +1,24 @@ +{ + "description": "As open-source LLMs have become more capable, a substantial ecosystem has developed around the fine-tuning of these models. A thriving community of researchers, developers, practitioners and hobbyists has emerged which focuses on topics ranging from memory efficiency, parameter-efficient fine-tuning and quantization to performance at scale and reproducible evaluations. The goal of this mini-summit is to bring this community together to discuss ideas, share knowledge and build connections.\n\nThe agenda features a keynote from Joe Spisak on the state of the Llama ecosystem followed by invited talks from the founders of Axolotl, Unsloth and torchtune. We conclude the summit with a riveting discussion on what\u2019s next for LLMs, fine-tuning and the PyTorch ecosystem with a fabulous panel of experts - Tim Dettmers (author of bitsandbytes and QLoRA), Hailey Schoelkopf (maintainer of LM Eval Harness at EleutherAI), Aakanksha Chowdhery (Lead author on PaLM and Gemini) and Alexis Conneau (Research Lead at OpenAI)", + "duration": 1044, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Joe Spisak" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/9_gmwWCdAlU/maxresdefault.webp", + "title": "The State of the Llama Ecosystem", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=9_gmwWCdAlU" + } + ] +} diff --git a/pytorchconf-2024/videos/together-goes-brrr-threading-research-production-with-torch-compile-pragaash-ponnusamy.json b/pytorchconf-2024/videos/together-goes-brrr-threading-research-production-with-torch-compile-pragaash-ponnusamy.json new file mode 100644 index 000000000..b2e4d8841 --- /dev/null +++ b/pytorchconf-2024/videos/together-goes-brrr-threading-research-production-with-torch-compile-pragaash-ponnusamy.json @@ -0,0 +1,24 @@ +{ + "description": "The deployment of large language models for inference at scale is inherently complex, often requiring intricate optimizations across compute-bound and memory-bound regimes. This talk explores how PyTorch's torch.compile has revolutionized the optimization landscape for LLM serving at Together AI. Through its sophisticated Dynamo tracer and Inductor backend, torch.compile has transformed the approach to critical performance bottlenecks in both prefill and decode phases of inference. We examine how automatic vertical fusion, epilogue optimization, and adaptive kernel generation across batch sizes for GEMV and GEMM workloads, addressing key efficiency concerns, from CUDA graph captures and optimized all-reduce strategies to custom kernel registrations. The presentation highlights Together AI's journey in leveraging torch.compile to streamline the transition from research to production, significantly simplifying the deployment process for even custom architectures. By automating many performance-critical optimizations, torch.compile has not only enhanced inference efficiency but also democratized high-performance LLM deployment. We'll conclude by sharing key lessons learned and best practices gleaned from Together AI's experience in deploying torch.compile to production, serving billions of user queries and navigating the complexities of large-scale LLM inference.", + "duration": 566, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Pragaash Ponnusamy" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/zn0Pm2Pv3O0/maxresdefault.webp", + "title": "Together Goes Brrr: Threading Research & Production with Torch Compile", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=zn0Pm2Pv3O0" + } + ] +} diff --git a/pytorchconf-2024/videos/torch-compile-for-autograd-ddp-and-fsdp-will-feng-chien-chin-huang-simon-fan-meta.json b/pytorchconf-2024/videos/torch-compile-for-autograd-ddp-and-fsdp-will-feng-chien-chin-huang-simon-fan-meta.json new file mode 100644 index 000000000..225b221f8 --- /dev/null +++ b/pytorchconf-2024/videos/torch-compile-for-autograd-ddp-and-fsdp-will-feng-chien-chin-huang-simon-fan-meta.json @@ -0,0 +1,26 @@ +{ + "description": "In this talk, we will present the latest advancements in torch.compile for distributed training via DDP and FSDP. We will first introduce Compiled Autograd, a torch.compile mode to fully capture the backpropagation step, including the communication collective operators used in distributed. We will then cover the improvements this new approach brought to Compiled DDP/FSDP, notably by removing DDP/FSDP graph breaks which brings the potential of improving compute/communication overlap.", + "duration": 1343, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Will Feng", + "Chien-Chin Huang", + "Simon Fan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/1XuibaVRewc/maxresdefault.webp", + "title": "Torch.Compile for Autograd, DDP and FSDP", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=1XuibaVRewc" + } + ] +} diff --git a/pytorchconf-2024/videos/torchinductor-cpu-backend-advancements-new-features-and-performance-imp-jiong-gong-leslie-fang.json b/pytorchconf-2024/videos/torchinductor-cpu-backend-advancements-new-features-and-performance-imp-jiong-gong-leslie-fang.json new file mode 100644 index 000000000..d20caa4fc --- /dev/null +++ b/pytorchconf-2024/videos/torchinductor-cpu-backend-advancements-new-features-and-performance-imp-jiong-gong-leslie-fang.json @@ -0,0 +1,25 @@ +{ + "description": "This presentation provides an update on the latest advancements in the TorchInductor CPU backend since the last conference to bring best-in-class CPU performance for broad DL workloads. We will discuss new features and performance enhancements, including: \u2022 Max-autotune support with codegen for GEMMs, boosting performance for GEMM-related operations \u2022 Enhanced vectorized codegen support, now covering all data types beyond floating points with flexible vector factors, and optimized loop scheduling \u2022 Comprehensive quantization support, including weight-only-quantization (WoQ), and optimizations for dynamic quantization and quantization-aware training \u2022 Improved Attention support, featuring attention masks and optimizating SoftMax via flash attention v2 etc. \u2022 AOTInductor support, enabling high-performance inference with frozen weights \u2022 Native Windows support, with improved vectorization capabilities These advancements, combined with ongoing optimizations, have resulted in significant performance improvements since PyTorch 2.1, demonstrated through extensive benchmarks and large language models (LLMs).", + "duration": 1312, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Jiong Gong", + "Leslie Fang" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/VwmOgzXtxYw/maxresdefault.webp", + "title": "TorchInductor CPU Backend Advancements: New Features and Performance Improvements", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=VwmOgzXtxYw" + } + ] +} diff --git a/pytorchconf-2024/videos/torchtitan-large-scale-llm-training-using-native-pytorch-3d-parallel-wanchao-liang-linsong-chu.json b/pytorchconf-2024/videos/torchtitan-large-scale-llm-training-using-native-pytorch-3d-parallel-wanchao-liang-linsong-chu.json new file mode 100644 index 000000000..672f407e5 --- /dev/null +++ b/pytorchconf-2024/videos/torchtitan-large-scale-llm-training-using-native-pytorch-3d-parallel-wanchao-liang-linsong-chu.json @@ -0,0 +1,25 @@ +{ + "description": "torchtitan is a proof-of-concept for Large-scale LLM training using native PyTorch. It is a repo that showcases PyTorch's latest distributed training features in a clean, minimal codebase. We show-cased end to end large scale training features enablement: 1. 3D/4D Parallelism 2. Efficient distributed checkpoint save/load/resharding 3. Many efficient training techniques including Float8, torch.compile, activation checkpoint, etc.", + "duration": 1460, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Wanchao Liang", + "Linsong Chu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/WsNEBxPDljU/maxresdefault.webp", + "title": "Torchtitan: Large-Scale LLM Training Using Native PyTorch 3D Parallelism", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=WsNEBxPDljU" + } + ] +} diff --git a/pytorchconf-2024/videos/torchtune-easy-and-accessible-finetuning-in-native-pytorch-evan-smothers-meta.json b/pytorchconf-2024/videos/torchtune-easy-and-accessible-finetuning-in-native-pytorch-evan-smothers-meta.json new file mode 100644 index 000000000..01044c26c --- /dev/null +++ b/pytorchconf-2024/videos/torchtune-easy-and-accessible-finetuning-in-native-pytorch-evan-smothers-meta.json @@ -0,0 +1,24 @@ +{ + "description": "As open-source LLMs have become more capable, a substantial ecosystem has developed around the fine-tuning of these models. A thriving community of researchers, developers, practitioners and hobbyists has emerged which focuses on topics ranging from memory efficiency, parameter-efficient fine-tuning and quantization to performance at scale and reproducible evaluations. The goal of this mini-summit is to bring this community together to discuss ideas, share knowledge and build connections.\n\nThe agenda features a keynote from Joe Spisak on the state of the Llama ecosystem followed by invited talks from the founders of Axolotl, Unsloth and torchtune. We conclude the summit with a riveting discussion on what\u2019s next for LLMs, fine-tuning and the PyTorch ecosystem with a fabulous panel of experts - Tim Dettmers (author of bitsandbytes and QLoRA), Hailey Schoelkopf (maintainer of LM Eval Harness at EleutherAI), Aakanksha Chowdhery (Lead author on PaLM and Gemini) and Alexis Conneau (Research Lead at OpenAI)", + "duration": 958, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Evan Smothers" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/43X9E25-Qg0/maxresdefault.webp", + "title": "torchtune: Easy and Accessible Finetuning in Native PyTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=43X9E25-Qg0" + } + ] +} diff --git a/pytorchconf-2024/videos/training-moes-at-scale-with-pytorch-mihir-patel-brian-chu-databricks.json b/pytorchconf-2024/videos/training-moes-at-scale-with-pytorch-mihir-patel-brian-chu-databricks.json new file mode 100644 index 000000000..6450167a6 --- /dev/null +++ b/pytorchconf-2024/videos/training-moes-at-scale-with-pytorch-mihir-patel-brian-chu-databricks.json @@ -0,0 +1,25 @@ +{ + "description": "Mixture-of-Experts MoE (models) are becoming an increasingly popular architecture choice for large language models (LLMs). In this talk, we describe how to train MoE models with PyTorch. After discussing various performance tradeoffs, we use PyTorch distributed tools like DTensor to build custom parallelism approaches, including expert parallelism via MegaBlocks. We then show how to get near linear scaling to thousands of GPUs, combining PyTorch FSDP and HSDP with our parallelism strategies. We discuss many of the challenges of training at scale, including communication bottlenecks, hardware failures, and networking challenges. We further improve training at scale setups using tools like PyTorch Distributed Checkpointing for rapid saving and loading. We then highlight further optimizations to minimize challenges only present at scale, such as object store failures for large checkpoints.", + "duration": 1132, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Mihir Patel", + "Brian Chu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/f2OxAWRCmPo/maxresdefault.webp", + "title": "Training MoEs at Scale with PyTorch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=f2OxAWRCmPo" + } + ] +} diff --git a/pytorchconf-2024/videos/triton-maximizing-kernel-development-productivity-under-performance-constraints-philip-tillet.json b/pytorchconf-2024/videos/triton-maximizing-kernel-development-productivity-under-performance-constraints-philip-tillet.json new file mode 100644 index 000000000..b7fc6a97d --- /dev/null +++ b/pytorchconf-2024/videos/triton-maximizing-kernel-development-productivity-under-performance-constraints-philip-tillet.json @@ -0,0 +1,24 @@ +{ + "description": "Machine Learning research workflows are often bottlenecked by the development of compute kernels for new algorithms and GPU architectures. This process can be daunting, and often requires a careful trade-off between productivity and performance. In this talk, we will discuss how Triton -- a mid-level programming language for kernel development -- approaches this multi-objective optimization problem, and the design decisions that were made to that effect.", + "duration": 601, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Philip Tillet" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/h449o7UijHk/maxresdefault.webp", + "title": "Maximizing Kernel Development Productivity Under Performance Constraints", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=h449o7UijHk" + } + ] +} diff --git a/pytorchconf-2024/videos/tvm-universally-deploy-large-language-models-via-ml-compilation-tianqi-chen-cmu-octoai.json b/pytorchconf-2024/videos/tvm-universally-deploy-large-language-models-via-ml-compilation-tianqi-chen-cmu-octoai.json new file mode 100644 index 000000000..ac26dbe79 --- /dev/null +++ b/pytorchconf-2024/videos/tvm-universally-deploy-large-language-models-via-ml-compilation-tianqi-chen-cmu-octoai.json @@ -0,0 +1,24 @@ +{ + "description": "Deploying deep learning models on various devices has become an important topic. Machine learning compilation is an emerging field that leverages compiler and automatic search techniques to accelerate AI models. ML compilation brings a unique set of challenges: emerging machine learning models; increasing hardware specialization brings a diverse set of acceleration primitives; growing tension between flexibility and performance. In this talk. I then discuss our experience in bringing foundational models to a variety of devices and hardware environments through machine learning compilation.", + "duration": 650, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Tianqi Chen" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/n66LLI_ebx0/maxresdefault.webp", + "title": "Universally Deploy Large-language Models via ML Compilation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=n66LLI_ebx0" + } + ] +} diff --git a/pytorchconf-2024/videos/understanding-the-llm-inference-workload-mark-moyou-nvidia.json b/pytorchconf-2024/videos/understanding-the-llm-inference-workload-mark-moyou-nvidia.json new file mode 100644 index 000000000..1635f196e --- /dev/null +++ b/pytorchconf-2024/videos/understanding-the-llm-inference-workload-mark-moyou-nvidia.json @@ -0,0 +1,24 @@ +{ + "description": "Understanding how to effectively size a production grade LLM deployment requires understanding of the model(s), the compute hardware, quantization and parallelization methods, KV Cache budgets, input and output token length predictions, model adapter management and much more. - Why LLM inference is different to standard deep learning inference - Current and future NVIDIA GPU overview - which GPU(s) for which models and why - Understanding the importance of building inference engines - Deep recap on the attention mechanism along with different types of popular attention mechanisms used in production - Deep dive on KV Cache and managing KV Cache budgets - Parallelism (reducing latency) - mainly tensor parallelism, but data, sequence, pipeline, and expert parallelism will be highlighted - Quantization methods on weights, activations, and KV Cache to reduce engine sizes for more effective GPU utilization - Increasing throughput with inflight batching and other techniques - Detailed performance analysis of LLM deployments looking at Time to first token, inter-token latencies, llm deployment characterizations, and more that can help reduce deployment costs", + "duration": 2054, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Mark Moyou" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/z2M8gKGYws4/maxresdefault.webp", + "title": "Understanding the LLM Inference Workload", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=z2M8gKGYws4" + } + ] +} diff --git a/pytorchconf-2024/videos/unlocking-the-enigma-crafting-unbiased-transparent-and-explainable-large-languag-rashmi-nagpal.json b/pytorchconf-2024/videos/unlocking-the-enigma-crafting-unbiased-transparent-and-explainable-large-languag-rashmi-nagpal.json new file mode 100644 index 000000000..e67d1dfce --- /dev/null +++ b/pytorchconf-2024/videos/unlocking-the-enigma-crafting-unbiased-transparent-and-explainable-large-languag-rashmi-nagpal.json @@ -0,0 +1,24 @@ +{ + "description": "In an era where artificial intelligence reigns supreme, the statistics are both perplexing and thought-provoking \u2013 only a mere 13% of large language models manage to transcend the realms of research and enter the practical world of production. Who bears the responsibility when these models err, spewing out biased or discriminatory outputs? It's time to demystify the complex landscape of machine learning ethics and carve a path towards a brighter, more accountable future! In this talk, firstly, we will navigate the profound impacts of large language models across diverse domains, from the lifesaving advances in medicine to safeguarding our nations through enhanced security protocols. Secondly, as we marvel at data-driven decisions laid by these models, we will confront the darker shadows cast by \u2013 the looming spectre of bias in the data. Finally, we will delve deep into the art of building interpretable models and navigating the maze of ethical considerations. Through a live demonstration in PyTorch, we will witness how to craft unbiased, transparent, and explainable models.", + "duration": 2998, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Rashmi Nagpal" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/vGHb4jO2_Ks/maxresdefault.webp", + "title": "Unlocking the Enigma: Crafting Unbiased, Transparent, and Explainable Large Language Models", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=vGHb4jO2_Ks" + } + ] +} diff --git a/pytorchconf-2024/videos/vllm-easy-fast-and-cheap-llm-serving-for-everyone-woosuk-kwon-xiaoxuan-liu-uc-berkeley.json b/pytorchconf-2024/videos/vllm-easy-fast-and-cheap-llm-serving-for-everyone-woosuk-kwon-xiaoxuan-liu-uc-berkeley.json new file mode 100644 index 000000000..60e678f5e --- /dev/null +++ b/pytorchconf-2024/videos/vllm-easy-fast-and-cheap-llm-serving-for-everyone-woosuk-kwon-xiaoxuan-liu-uc-berkeley.json @@ -0,0 +1,25 @@ +{ + "description": "We will present vLLM, an open-source high-performance LLM inference engine built on top of PyTorch. Starting as a research project at UC Berkeley, vLLM has been one of the fastest and most popular LLM inference solutions in industry, reaching 20K+ stars and 350+ contributors. In this talk, we will cover how vLLM adopts various LLM inference optimizations and how it supports various AI accelerators such as AMD GPUs, Google TPUs, and AWS Inferentia. Also, we will discuss how vLLM benefits from PyTorch 2 and its ecosystem.", + "duration": 1412, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Woosuk Kwon", + "Xiaoxuan Liu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/9ih0EmcXRHE/maxresdefault.webp", + "title": "vLLM: Easy, Fast, and Cheap LLM Serving for Everyone", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=9ih0EmcXRHE" + } + ] +} diff --git a/pytorchconf-2024/videos/welcome-to-the-pytorch-ecosystem-for-llm-fine-tuning-mini-summit-kartikay-khandelwal-meta.json b/pytorchconf-2024/videos/welcome-to-the-pytorch-ecosystem-for-llm-fine-tuning-mini-summit-kartikay-khandelwal-meta.json new file mode 100644 index 000000000..393385327 --- /dev/null +++ b/pytorchconf-2024/videos/welcome-to-the-pytorch-ecosystem-for-llm-fine-tuning-mini-summit-kartikay-khandelwal-meta.json @@ -0,0 +1,24 @@ +{ + "description": "As open-source LLMs have become more capable, a substantial ecosystem has developed around the fine-tuning of these models. A thriving community of researchers, developers, practitioners and hobbyists has emerged which focuses on topics ranging from memory efficiency, parameter-efficient fine-tuning and quantization to performance at scale and reproducible evaluations. The goal of this mini-summit is to bring this community together to discuss ideas, share knowledge and build connections.\n\nThe agenda features a keynote from Joe Spisak on the state of the Llama ecosystem followed by invited talks from the founders of Axolotl, Unsloth and torchtune. We conclude the summit with a riveting discussion on what\u2019s next for LLMs, fine-tuning and the PyTorch ecosystem with a fabulous panel of experts - Tim Dettmers (author of bitsandbytes and QLoRA), Hailey Schoelkopf (maintainer of LM Eval Harness at EleutherAI), Aakanksha Chowdhery (Lead author on PaLM and Gemini) and Alexis Conneau (Research Lead at OpenAI)", + "duration": 81, + "language": "eng", + "recorded": "2024-09-18", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pytorch.org/event/pytorch-conference-2024/" + } + ], + "speakers": [ + "Kartikay Khandelwal" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi_webp/Pe_VT5ReB3U/maxresdefault.webp", + "title": "Welcome to the PyTorch Ecosystem for LLM Fine-tuning Mini Summit", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Pe_VT5ReB3U" + } + ] +}