publications | Pengrui Lu

2026

arXiv

ProjDevBench: Benchmarking AI Coding Agents on End-to-End Project Development

Pengrui Lu^*, Shiqi Zhang^*, Yunzhong Hou^*, and 8 more authors

arXiv preprint arXiv:2602.01655, 2026

@article{lu2026projdevbench,
  title = {ProjDevBench: Benchmarking AI Coding Agents on End-to-End Project Development},
  author = {Lu, Pengrui and Zhang, Shiqi and Hou, Yunzhong and Ye, Lyumanshan and Huang, Chaoyi and Chen, Zixi and Zeng, Ji and Jiang, Hantao and Liu, Pengfei and Wang, Yiwei and Yang, Ming-Hsuan},
  year = {2026},
  journal = {arXiv preprint arXiv:2602.01655},
}

2025

arXiv

InnovatorBench: Evaluating Agents’ Ability to Conduct Innovative LLM Research

Y. Wu, D. Fu, W. Si, and 9 more authors

arXiv preprint arXiv:2510.27598, 2025

Abs arXiv Bib

@article{wu2025innovatorbench,
  title = {InnovatorBench: Evaluating Agents' Ability to Conduct Innovative LLM Research},
  author = {Wu, Y. and Fu, D. and Si, W. and Huang, Z. and Jiang, M. and Li, K. and Xia, S. and Sun, J. and Xu, T. and Hu, X. and Lu, Pengrui and others},
  year = {2025},
  journal = {arXiv preprint arXiv:2510.27598},
}

arXiv

Interaction as Intelligence Part II: Asynchronous Human-Agent Rollout for Long-Horizon Task Training

D. Fu, Y. Wu, X. Cai, and 9 more authors

arXiv preprint arXiv:2510.27630, 2025

Abs arXiv Bib

@article{fu2025interaction2,
  title = {Interaction as Intelligence Part II: Asynchronous Human-Agent Rollout for Long-Horizon Task Training},
  author = {Fu, D. and Wu, Y. and Cai, X. and Ye, L. and Xia, S. and Huang, Z. and Si, W. and Xu, T. and Sun, J. and Li, K. and Jiang, M. and others},
  year = {2025},
  journal = {arXiv preprint arXiv:2510.27630},
}

arXiv

Interaction as Intelligence: Deep Research With Human-AI Partnership

L. Ye, X. Cai, X. Wang, and 8 more authors

arXiv preprint arXiv:2507.15759, 2025

Abs arXiv Bib

@article{ye2025interaction,
  title = {Interaction as Intelligence: Deep Research With Human-AI Partnership},
  author = {Ye, L. and Cai, X. and Wang, X. and Wang, J. and Hu, X. and Su, J. and Nan, Y. and Wang, S. and Zhang, B. and Fan, X. and others},
  year = {2025},
  journal = {arXiv preprint arXiv:2507.15759},
}

arXiv

ParaCook: On Time-Efficient Planning for Multi-Agent Systems

S. Zhang, X. Ma, Y. Xu, and 7 more authors

arXiv preprint arXiv:2510.11608, 2025

Abs arXiv Bib

@article{zhang2025paracook,
  title = {ParaCook: On Time-Efficient Planning for Multi-Agent Systems},
  author = {Zhang, S. and Ma, X. and Xu, Y. and Cao, Z. and Lu, Pengrui and Yuan, H. and Shen, T. and Zhang, Z. and Zhao, H. and others},
  year = {2025},
  journal = {arXiv preprint arXiv:2510.11608},
}

arXiv
ResearcherBench: Evaluating Deep AI Research Systems on the Frontiers of Scientific Inquiry

Tianze Xu^*, Pengrui Lu^*, Lyumanshan Ye, and 2 more authors

arXiv preprint arXiv:2507.16280, 2025

Abs arXiv Bib HTML Code

The first benchmark focused on evaluating the capabilities of Deep AI Research Systems (DARS) on frontier AI scientific questions, featuring 65 expertly curated research questions across 35 distinct AI research subjects with a dual assessment framework.
@article{lu2025researcherbench, title = {ResearcherBench: Evaluating Deep AI Research Systems on the Frontiers of Scientific Inquiry}, author = {Xu, Tianze and Lu, Pengrui and Ye, Lyumanshan and Hu, Xiangkun and Liu, Pengfei}, year = {2025}, journal = {arXiv preprint arXiv:2507.16280}, }
arXiv
DeepResearcher: Scaling Deep Research via Reinforcement Learning in Real-world Environments

Yuxiang Zheng, Dayuan Fu, Xiangkun Hu, and 4 more authors

arXiv preprint arXiv:2504.03160, 2025

Abs arXiv Bib HTML Code

The first comprehensive framework for end-to-end training of LLM-based deep research agents through scaling reinforcement learning in real-world environments with authentic web search interactions.
@article{zheng2025deepresearcher, title = {DeepResearcher: Scaling Deep Research via Reinforcement Learning in Real-world Environments}, author = {Zheng, Yuxiang and Fu, Dayuan and Hu, Xiangkun and Cai, Xiaojie and Ye, Lyumanshan and Lu, Pengrui and Liu, Pengfei}, year = {2025}, journal = {arXiv preprint arXiv:2504.03160}, }

arXiv

Deep Cognition: A Multi-Agent Framework for Collaborative Research with Real-Time Cognitive Oversight

L. Ye, X. Cai, X. Wang, and 8 more authors

arXiv preprint, 2025

Abs Bib

@article{ye2025deepcognition,
  title = {Deep Cognition: A Multi-Agent Framework for Collaborative Research with Real-Time Cognitive Oversight},
  author = {Ye, L. and Cai, X. and Wang, X. and Wang, J. and Hu, X. and Su, J. and Nan, Y. and Luo, J. and Wang, S. and Fan, X. and others},
  year = {2025},
  journal = {arXiv preprint},
}