-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpapers.bib
More file actions
51 lines (47 loc) · 1.91 KB
/
papers.bib
File metadata and controls
51 lines (47 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
---
---
```bib
@article{Liang2025InjectingAI,
bibtex_show={true},
pdf={https://arxiv.org/pdf/2503.20552},
code={https://github.com/ASISys/Adrenaline},
selected={true},
title={Injecting Adrenaline into LLM Serving: Boosting Resource Utilization and Throughput via Attention Disaggregation},
author={Yunkai Liang† and Zhangyu Chen† and Pengfei Zuo and Zhi Zhou and Xu Chen and Zhou Yu},
journal={arXiv preprint arXiv:2503.20552},
year={2025},
abbr={arXiv}
}
@article{Zhou2025ProgressiveSA,
bibtex_show={true},
pdf={https://arxiv.org/pdf/2503.00392},
selected={true},
title={Progressive Sparse Attention: Algorithm and System Co-design for Efficient Attention in LLM Serving},
author={Qihui Zhou and Peiqi Yin and Pengfei Zuo and James Cheng},
journal={arXiv preprint arXiv:2503.00392},
year={2025},
abbr={arXiv}
}
@inproceedings{He2025AdaSkipAS,
bibtex_show={true},
pdf={https://arxiv.org/pdf/2501.02336},
code={https://github.com/ASISys/AdaSkip},
selected={true},
title={AdaSkip: Adaptive Sublayer Skipping for Accelerating Long-Context LLM Inference},
author={Zhuomin He† and Yizhen Yao† and Pengfei Zuo and Bin Gao and Qinya Li and Zhenzhe Zheng and Fan Wu},
booktitle={Proceedings of the 39th Annual AAAI Conference on Artificial Intelligence (AAAI)},
year={2025},
abbr={AAAI}
}
@inproceedings{Gao2024CostEfficientLL,
bibtex_show={true},
pdf={https://www.usenix.org/system/files/atc24-gao-bin-cost.pdf},
video={https://www.usenix.org/conference/atc24/presentation/gao-bin-cost},
selected={true},
title={Cost-Efficient Large Language Model Serving for Multi-turn Conversations with CachedAttention},
author={Bin Gao and Zhuomin He and Puru Sharma and Qingxuan Kang and Djordje Jevdjic and Junbo Deng and Xingkun Yang and Zhou Yu and Pengfei Zuo},
booktitle={Proceedings of the 2024 USENIX Annual Technical Conference (USENIX ATC)},
year={2024},
abbr={USENIX ATC}
}
```