You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
@@ -117,14 +127,11 @@ More detailed training settings can be found in the [oreal/configs](./oreal/conf
117
127
## 🖊️ Citation
118
128
119
129
```
120
-
@misc{lyu2025exploringlimitoutcomereward,
121
-
title={Exploring the Limit of Outcome Reward for Learning Mathematical Reasoning},
122
-
author={Chengqi Lyu and Songyang Gao and Yuzhe Gu and Wenwei Zhang and Jianfei Gao and Kuikun Liu and Ziyi Wang and Shuaibin Li and Qian Zhao and Haian Huang and Weihan Cao and Jiangning Liu and Hongwei Liu and Junnan Liu and Songyang Zhang and Dahua Lin and Kai Chen},
123
-
year={2025},
124
-
eprint={2502.06781},
125
-
archivePrefix={arXiv},
126
-
primaryClass={cs.CL},
127
-
url={https://arxiv.org/abs/2502.06781},
130
+
@article{lyu2025exploring,
131
+
title={Exploring the Limit of Outcome Reward for Learning Mathematical Reasoning},
132
+
author={Lyu, Chengqi and Gao, Songyang and Gu, Yuzhe and Zhang, Wenwei and Gao, Jianfei and Liu, Kuikun and Wang, Ziyi and Li, Shuaibin and Zhao, Qian and Huang, Haian and others},
0 commit comments