20:50
Research
Open, rigorous, and transparent
We drive rigorous and transparent research with open benchmarks, models, and agent systems that advance the science of financial AI.
Research
Selected papers from the community, spanning open benchmarks, financial language models, and agent systems. Full, up-to-date list on Google Scholar; open benchmarks, datasets, and models on Hugging Face.
All publications on Google Scholar →
Benchmarks & models on Hugging Face →
2026
-
When Agents Trade: Live Multi-Market Trading Arena for LLM Agents
Space Agent-Market-Arena 9 ♡
@inproceedings{DBLP:conf/www/QianPSHHLCYXLWZ26, author = {Lingfei Qian and Xueqing Peng and Hanley Smith and Yi Han and Yueru He and Haohang Li and Yupeng Cao and Yangyang Yu and Guojun Xiong and Peng Lu and Yan Wang and Vincent Jim Zhang and Huan He and Alejandro Lopez{-}Lira and Jimin Huang and Jian{-}Yun Nie and Sophia Ananiadou}, editor = {Hakim Hacid and Yoelle Maarek and Francesco Bonchi and Ido Guy and Emine Yilmaz}, title = {When Agents Trade: Live Multi-Market Trading Arena for {LLM} Agents}, booktitle = {Proceedings of the {ACM} Web Conference 2026, {WWW} 2026, Dubai, United Arab Emirates, originally scheduled for April 13-17, 2026, rescheduled for June 29 - July 3, 2026}, pages = {7833--7844}, publisher = {{ACM}}, year = {2026}, url = {https://doi.org/10.1145/3774904.3792821}, doi = {10.1145/3774904.3792821}, } -
Herculean: An Agentic Benchmark for Financial Intelligence
Dataset Herculean 878 ↓
@misc{thefin_herculean, title = {Herculean: An Agentic Benchmark for Financial Intelligence}, author = {X. Peng and Z. Xie and Y. Cao and H. Li and L. Qian and Y. Wang and V. J. Zhang and H. He and X. Ai and L. Ma and others}, year = {2026}, eprint = {2605.14355}, archivePrefix = {arXiv} } -
FinReporting: An Agentic Workflow for Localized Reporting of Cross-Jurisdiction Financial Disclosures
@article{DBLP:journals/corr/abs-2604-05966, author = {Fan Zhang and Mingzi Song and Rania Elbadry and Yankai Chen and Shaobo Wang and Yixi Zhou and Xunwen Zheng and Yueru He and Yuyang Dai and Georgi Georgiev and Ayesha Gull and Muhammad Usman Safder and Fan Wu and Liyuan Meng and Fengxian Ji and Junning Zhao and Xueqing Peng and Jimin Huang and Yu Chen and Xue Liu and Preslav Nakov and Zhuohan Xie}, title = {FinReporting: An Agentic Workflow for Localized Reporting of Cross-Jurisdiction Financial Disclosures}, journal = {CoRR}, volume = {abs/2604.05966}, year = {2026}, url = {https://doi.org/10.48550/arXiv.2604.05966}, doi = {10.48550/ARXIV.2604.05966}, eprinttype = {arXiv}, eprint = {2604.05966}, } -
Can LLM Agents Be CFOs? A Benchmark for Resource Allocation in Dynamic Enterprise Environments
@article{DBLP:journals/corr/abs-2603-23638, author = {Yi Han and Lingfei Qian and Yan Wang and Yueru He and Xueqing Peng and Dongji Feng and Yankai Chen and Haohang Li and Yupeng Cao and Jimin Huang and Xue Liu and Jian{-}Yun Nie and Sophia Ananiadou}, title = {Can {LLM} Agents Be CFOs? {A} Benchmark for Resource Allocation in Dynamic Enterprise Environments}, journal = {CoRR}, volume = {abs/2603.23638}, year = {2026}, url = {https://doi.org/10.48550/arXiv.2603.23638}, doi = {10.48550/ARXIV.2603.23638}, eprinttype = {arXiv}, eprint = {2603.23638}, } -
Conv-FinRe: A Conversational and Longitudinal Benchmark for Utility-Grounded Financial Recommendation
@article{DBLP:journals/corr/abs-2602-16990, author = {Yan Wang and Yi Han and Lingfei Qian and Yueru He and Xueqing Peng and Dongji Feng and Zhuohan Xie and Vincent Jim Zhang and Rosie Guo and Fengran Mo and Jimin Huang and Yankai Chen and Xue Liu and Jian{-}Yun Nie}, title = {Conv-FinRe: {A} Conversational and Longitudinal Benchmark for Utility-Grounded Financial Recommendation}, journal = {CoRR}, volume = {abs/2602.16990}, year = {2026}, url = {https://doi.org/10.48550/arXiv.2602.16990}, doi = {10.48550/ARXIV.2602.16990}, eprinttype = {arXiv}, eprint = {2602.16990}, } -
Ebisu: Benchmarking Large Language Models in Japanese Finance
@article{DBLP:journals/corr/abs-2602-01479, author = {Xueqing Peng and Ruoyu Xiang and Fan Zhang and Mingzi Song and Mingyang Jiang and Yan Wang and Lingfei Qian and Taiki Hara and Yuqing Guo and Jimin Huang and Junichi Tsujii and Sophia Ananiadou}, title = {Ebisu: Benchmarking Large Language Models in Japanese Finance}, journal = {CoRR}, volume = {abs/2602.01479}, year = {2026}, url = {https://doi.org/10.48550/arXiv.2602.01479}, doi = {10.48550/ARXIV.2602.01479}, eprinttype = {arXiv}, eprint = {2602.01479}, } -
Same Claim, Different Judgment: Benchmarking Scenario-Induced Bias in Multilingual Financial Misinformation Detection
@article{DBLP:journals/corr/abs-2601-05403, author = {Zhiwei Liu and Yupen Cao and Yuechen Jiang and Mohsinul Kabir and Polydoros Giannouris and Chen Xu and Ziyang Xu and Tianlei Zhu and Tariquzzaman Faisal and Triantafillos Papadopoulos and Yan Wang and Lingfei Qian and Xueqing Peng and Zhuohan Xie and Ye Yuan and Saeed Almheiri and Abdulrazzaq Alnajjar and Mingbin Chen and Harry Stuart and Paul Thompson and Prayag Tiwari and Alejandro Lopez{-}Lira and Xue Liu and Jimin Huang and Sophia Ananiadou}, title = {Same Claim, Different Judgment: Benchmarking Scenario-Induced Bias in Multilingual Financial Misinformation Detection}, journal = {CoRR}, volume = {abs/2601.05403}, year = {2026}, url = {https://doi.org/10.48550/arXiv.2601.05403}, doi = {10.48550/ARXIV.2601.05403}, eprinttype = {arXiv}, eprint = {2601.05403}, } -
All That Glisters Is Not Gold: A Benchmark for Reference-Free Counterfactual Financial Misinformation Detection
@article{DBLP:journals/corr/abs-2601-04160, author = {Yuechen Jiang and Zhiwei Liu and Yupeng Cao and Yueru He and Ziyang Xu and Chen Xu and Zhiyang Deng and Prayag Tiwari and Xi Chen and Alejandro Lopez{-}Lira and Jimin Huang and Junichi Tsujii and Sophia Ananiadou}, title = {All That Glisters Is Not Gold: {A} Benchmark for Reference-Free Counterfactual Financial Misinformation Detection}, journal = {CoRR}, volume = {abs/2601.04160}, year = {2026}, url = {https://doi.org/10.48550/arXiv.2601.04160}, doi = {10.48550/ARXIV.2601.04160}, eprinttype = {arXiv}, eprint = {2601.04160}, } -
The Illusion of Specialization: Unveiling the Domain-Invariant "Standing Committee" in Mixture-of-Experts Models
@article{DBLP:journals/corr/abs-2601-03425, author = {Yan Wang and Yitao Xu and Nanhan Shen and Jinyan Su and Jimin Huang and Zining Zhu}, title = {The Illusion of Specialization: Unveiling the Domain-Invariant "Standing Committee" in Mixture-of-Experts Models}, journal = {CoRR}, volume = {abs/2601.03425}, year = {2026}, url = {https://doi.org/10.48550/arXiv.2601.03425}, doi = {10.48550/ARXIV.2601.03425}, eprinttype = {arXiv}, eprint = {2601.03425}, } -
The CLEF-2026 FinMMEval Lab: Multilingual and Multimodal Evaluation of Financial AI Systems
@inproceedings{DBLP:conf/ecir/XieEZGPQHDJDGWKSN26, author = {Zhuohan Xie and Rania Elbadry and Fan Zhang and Georgi Georgiev and Xueqing Peng and Lingfei Qian and Jimin Huang and Dimitar Dimitrov and Vanshikaa Jani and Yuyang Dai and Jiahui Geng and Yuxia Wang and Ivan Koychev and Veselin Stoyanov and Preslav Nakov}, editor = {Ricardo Campos and Adam Jatowt and Yanyan Lan and Mohammad Aliannejadi and Christine Bauer and Sean MacAvaney and Avishek Anand and Zhaochun Ren and Suzan Verberne and Nan Bai and Masoud Mansoury}, title = {The {CLEF-2026} FinMMEval Lab: Multilingual and Multimodal Evaluation of Financial {AI} Systems}, booktitle = {Advances in Information Retrieval - 48th European Conference on Information Retrieval, {ECIR} 2026, Delft, The Netherlands, March 29 - April 2, 2026, Proceedings, Part {IV}}, series = {Lecture Notes in Computer Science}, pages = {267--276}, publisher = {Springer}, year = {2026}, url = {https://doi.org/10.1007/978-3-032-21321-1\_37}, doi = {10.1007/978-3-032-21321-1\_37}, }
2025
-
InvestorBench: A Benchmark for Financial Decision-Making Tasks with LLM-based Agent
@inproceedings{DBLP:conf/acl/LiCYJDHJ0SHQPSX25, author = {Haohang Li and Yupeng Cao and Yangyang Yu and Shashidhar Reddy Javaji and Zhiyang Deng and Yueru He and Yuechen Jiang and Zining Zhu and K. P. Subbalakshmi and Jimin Huang and Lingfei Qian and Xueqing Peng and Jordan W. Suchow and Qianqian Xie}, editor = {Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Mohammad Taher Pilehvar}, title = {{INVESTORBENCH:} {A} Benchmark for Financial Decision-Making Tasks with LLM-based Agent}, booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), {ACL} 2025, Vienna, Austria, July 27 - August 1, 2025}, pages = {2509--2525}, publisher = {Association for Computational Linguistics}, year = {2025}, url = {https://aclanthology.org/2025.acl-long.126/}, } -
Plutus: Benchmarking Large Language Models in Low-Resource Greek Finance
Model plutus-8B-instruct 45 ↓ · 10 ♡
Datasets Plutus Greek tasks (×9) 617 ↓
Space Open-Greek-Financial-LLM-Leaderboard 10 ♡
@inproceedings{DBLP:conf/emnlp/PengPSGXWQHXA25, author = {Xueqing Peng and Triantafillos Papadopoulos and Efstathia Soufleri and Polydoros Giannouris and Ruoyu Xiang and Yan Wang and Lingfei Qian and Jimin Huang and Qianqian Xie and Sophia Ananiadou}, editor = {Christos Christodoulopoulos and Tanmoy Chakraborty and Carolyn Rose and Violet Peng}, title = {Plutus: Benchmarking Large Language Models in Low-Resource Greek Finance}, booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2025, Suzhou, China, November 4-9, 2025}, pages = {30176--30202}, publisher = {Association for Computational Linguistics}, year = {2025}, url = {https://doi.org/10.18653/v1/2025.emnlp-main.1535}, doi = {10.18653/V1/2025.EMNLP-MAIN.1535}, } -
Selective Preference Optimization via Token-Level Reward Function Estimation
@inproceedings{DBLP:conf/emnlp/YangLXHMA25, author = {Kailai Yang and Zhiwei Liu and Qianqian Xie and Jimin Huang and Erxue Min and Sophia Ananiadou}, editor = {Christos Christodoulopoulos and Tanmoy Chakraborty and Carolyn Rose and Violet Peng}, title = {Selective Preference Optimization via Token-Level Reward Function Estimation}, booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2025, Suzhou, China, November 4-9, 2025}, pages = {7032--7056}, publisher = {Association for Computational Linguistics}, year = {2025}, url = {https://doi.org/10.18653/v1/2025.emnlp-main.359}, doi = {10.18653/V1/2025.EMNLP-MAIN.359}, } -
FLAG-Trader: Fusion LLM-Agent with Gradient-Based Reinforcement Learning for Financial Trading
@inproceedings{DBLP:conf/acl/XiongDWCLYPLSLH25, author = {Guojun Xiong and Zhiyang Deng and Keyi Wang and Yupeng Cao and Haohang Li and Yangyang Yu and Xueqing Peng and Mingquan Lin and Kaleb E. Smith and Xiao{-}Yang Liu and Jimin Huang and Sophia Ananiadou and Qianqian Xie}, editor = {Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Mohammad Taher Pilehvar}, title = {{FLAG-TRADER:} Fusion LLM-Agent with Gradient-based Reinforcement Learning for Financial Trading}, booktitle = {Findings of the Association for Computational Linguistics, {ACL} 2025, Vienna, Austria, July 27 - August 1, 2025}, series = {Findings of {ACL}}, pages = {13921--13934}, publisher = {Association for Computational Linguistics}, year = {2025}, url = {https://aclanthology.org/2025.findings-acl.716/}, } -
UCFE: A User-Centric Financial Expertise Benchmark for Large Language Models
@inproceedings{DBLP:conf/naacl/YangZHGGHLZWXHYW25, author = {Yuzhe Yang and Yifei Zhang and Yan Hu and Yilin Guo and Ruoli Gan and Yueru He and Mingcong Lei and Xiao Zhang and Haining Wang and Qianqian Xie and Jimin Huang and Honghai Yu and Benyou Wang}, editor = {Luis Chiruzzo and Alan Ritter and Lu Wang}, title = {{UCFE:} {A} User-Centric Financial Expertise Benchmark for Large Language Models}, booktitle = {Findings of the Association for Computational Linguistics: {NAACL} 2025, Albuquerque, New Mexico, USA, April 29 - May 4, 2025}, series = {Findings of {ACL}}, pages = {5429--5448}, publisher = {Association for Computational Linguistics}, year = {2025}, url = {https://doi.org/10.18653/v1/2025.findings-naacl.300}, doi = {10.18653/V1/2025.FINDINGS-NAACL.300}, } -
UCL-Bench: A Chinese User-Centric Legal Benchmark for Large Language Models
@inproceedings{DBLP:conf/naacl/GanFZLJWCCXHW25, author = {Ruoli Gan and Duanyu Feng and Chen Zhang and Zhihang Lin and Haochen Jia and Hao Wang and Zhenyang Cai and Lei Cui and Qianqian Xie and Jimin Huang and Benyou Wang}, editor = {Luis Chiruzzo and Alan Ritter and Lu Wang}, title = {UCL-Bench: {A} Chinese User-Centric Legal Benchmark for Large Language Models}, booktitle = {Findings of the Association for Computational Linguistics: {NAACL} 2025, Albuquerque, New Mexico, USA, April 29 - May 4, 2025}, series = {Findings of {ACL}}, pages = {7945--7988}, publisher = {Association for Computational Linguistics}, year = {2025}, url = {https://doi.org/10.18653/v1/2025.findings-naacl.444}, doi = {10.18653/V1/2025.FINDINGS-NAACL.444}, } -
LAiW: A Chinese Legal Large Language Models Benchmark
@inproceedings{DBLP:conf/coling/DaiFHJXZHTW25, author = {Yongfu Dai and Duanyu Feng and Jimin Huang and Haochen Jia and Qianqian Xie and Yifang Zhang and Weiguang Han and Wei Tian and Hao Wang}, editor = {Owen Rambow and Leo Wanner and Marianna Apidianaki and Hend Al{-}Khalifa and Barbara Di Eugenio and Steven Schockaert}, title = {LAiW: {A} Chinese Legal Large Language Models Benchmark}, booktitle = {Proceedings of the 31st International Conference on Computational Linguistics, {COLING} 2025, Abu Dhabi, UAE, January 19-24, 2025}, pages = {10738--10766}, publisher = {Association for Computational Linguistics}, year = {2025}, url = {https://aclanthology.org/2025.coling-main.716/}, } -
FMDLLaMA: Financial Misinformation Detection Based on Large Language Models
Space FMD2025 5 ♡
@inproceedings{DBLP:conf/www/LiuZYXHA25, author = {Zhiwei Liu and Xin Zhang and Kailai Yang and Qianqian Xie and Jimin Huang and Sophia Ananiadou}, editor = {Guodong Long and Michale Blumestein and Yi Chang and Liane Lewin{-}Eytan and Zi Helen Huang and Elad Yom{-}Tov}, title = {FMDLlama: Financial Misinformation Detection Based on Large Language Models}, booktitle = {Companion Proceedings of the {ACM} on Web Conference 2025, {WWW} 2025, Sydney, NSW, Australia, 28 April 2025 - 2 May 2025}, pages = {1153--1157}, publisher = {{ACM}}, year = {2025}, url = {https://doi.org/10.1145/3701716.3715599}, doi = {10.1145/3701716.3715599}, } -
FinCriticalED: A Visual Benchmark for Financial Fact-Level OCR Evaluation
Dataset FinCriticalED 88 ↓ · 1 ♡
@article{DBLP:journals/corr/abs-2511-14998, author = {Yueru He and Xueqing Peng and Yupeng Cao and Yan Wang and Lingfei Qian and Haohang Li and Yi Han and Ruoyu Xiang and Mingquan Lin and Prayag Tiwari and Jimin Huang and Guojun Xiong and Sophia Ananiadou}, title = {FinCriticalED: {A} Visual Benchmark for Financial Fact-Level {OCR} Evaluation}, journal = {CoRR}, volume = {abs/2511.14998}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2511.14998}, doi = {10.48550/ARXIV.2511.14998}, eprinttype = {arXiv}, eprint = {2511.14998}, } -
FinAuditing: A Financial Taxonomy-Structured Multi-Document Benchmark for Evaluating LLMs
@article{DBLP:journals/corr/abs-2510-08886, author = {Yan Wang and Keyi Wang and Shanshan Yang and Jaisal Patel and Jeff Zhao and Fengran Mo and Xueqing Peng and Lingfei Qian and Jimin Huang and Guojun Xiong and Xiao{-}Yang Liu and Jian{-}Yun Nie}, title = {FinAuditing: {A} Financial Taxonomy-Structured Multi-Document Benchmark for Evaluating LLMs}, journal = {CoRR}, volume = {abs/2510.08886}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2510.08886}, doi = {10.48550/ARXIV.2510.08886}, eprinttype = {arXiv}, eprint = {2510.08886}, } -
MultiFinBen: Benchmarking Large Language Models for Multilingual and Multimodal Financial Application
Datasets MultiFinBen tasks (×4) 589 ↓
Datasets PolyFiQA tasks (×4) 316 ↓
@misc{thefin_multifinben, title = {MultiFinBen: Benchmarking Large Language Models for Multilingual and Multimodal Financial Application}, author = {X. Peng and L. Qian and Y. Wang and R. Xiang and Y. He and Y. Ren and M. Jiang and V. J. Zhang and others}, year = {2025}, eprint = {2506.14028}, archivePrefix = {arXiv} } -
RKEFino1: A Regulation Knowledge-Enhanced Large Language Model
@misc{thefin_rkefino1, title = {RKEFino1: A Regulation Knowledge-Enhanced Large Language Model}, author = {Y. Wang and Y. He and R. Xiang and J. Zhao}, year = {2025}, eprint = {2506.05700}, archivePrefix = {arXiv} } -
FinChain: A Symbolic Benchmark for Verifiable Chain-of-Thought Financial Reasoning
@misc{thefin_finchain, title = {FinChain: A Symbolic Benchmark for Verifiable Chain-of-Thought Financial Reasoning}, author = {Z. Xie and D. Orel and R. Thareja and D. Sahnan and H. Madmoun and F. Zhang and D. Banerjee and others}, year = {2025}, eprint = {2506.02515}, archivePrefix = {arXiv} } -
MMAffBen: A Multilingual and Multimodal Affective Analysis Benchmark for Evaluating LLMs and VLMs
@article{DBLP:journals/corr/abs-2505-24423, author = {Zhiwei Liu and Lingfei Qian and Qianqian Xie and Jimin Huang and Kailai Yang and Sophia Ananiadou}, title = {MMAFFBen: {A} Multilingual and Multimodal Affective Analysis Benchmark for Evaluating LLMs and VLMs}, journal = {CoRR}, volume = {abs/2505.24423}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2505.24423}, doi = {10.48550/ARXIV.2505.24423}, eprinttype = {arXiv}, eprint = {2505.24423}, } -
FinTagging: An LLM-Ready Benchmark for Extracting and Structuring Financial Information
Datasets FinTagging tasks (×5) 88 ↓
@article{DBLP:journals/corr/abs-2505-20650, author = {Yan Wang and Yang Ren and Lingfei Qian and Xueqing Peng and Keyi Wang and Yi Han and Dongji Feng and Xiao{-}Yang Liu and Jimin Huang and Qianqian Xie}, title = {FinTagging: An LLM-ready Benchmark for Extracting and Structuring Financial Information}, journal = {CoRR}, volume = {abs/2505.20650}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2505.20650}, doi = {10.48550/ARXIV.2505.20650}, eprinttype = {arXiv}, eprint = {2505.20650}, } -
FinAudio: A Benchmark for Audio Large Language Models in Financial Applications
@article{DBLP:journals/corr/abs-2503-20990, author = {Yupeng Cao and Haohang Li and Yangyang Yu and Shashidhar Reddy Javaji and Yueru He and Jimin Huang and Zining Zhu and Qianqian Xie and Xiao{-}Yang Liu and Koduvayur Subbalakshmi and Meikang Qiu and Sophia Ananiadou and Jian{-}Yun Nie}, title = {FinAudio: {A} Benchmark for Audio Large Language Models in Financial Applications}, journal = {CoRR}, volume = {abs/2503.20990}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2503.20990}, doi = {10.48550/ARXIV.2503.20990}, eprinttype = {arXiv}, eprint = {2503.20990}, } -
OrdRankBen: A Novel Ranking Benchmark for Ordinal Relevance in NLP
@article{DBLP:journals/corr/abs-2503-00674, author = {Yan Wang and Lingfei Qian and Xueqing Peng and Jimin Huang and Dongji Feng}, title = {OrdRankBen: {A} Novel Ranking Benchmark for Ordinal Relevance in {NLP}}, journal = {CoRR}, volume = {abs/2503.00674}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2503.00674}, doi = {10.48550/ARXIV.2503.00674}, eprinttype = {arXiv}, eprint = {2503.00674}, } -
Fino1: On the Transferability of Reasoning Enhanced LLMs to Finance
Model Fino1-8B 608 ↓ · 35 ♡
Model Fino1-14B 278 ↓
Model Fin-o1-8B 146 ↓ · 11 ♡
Model Fin-o1-14B 92 ↓ · 6 ♡
Dataset FinCoT 3.7k ↓ · 15 ♡
Dataset Fino1_Reasoning_Path_FinQA 3.6k ↓ · 40 ♡
Space open-finllm-reasoning-leaderboard 8 ♡
@article{DBLP:journals/corr/abs-2502-08127, author = {Lingfei Qian and Weipeng Zhou and Yan Wang and Xueqing Peng and Jimin Huang and Qianqian Xie}, title = {Fino1: On the Transferability of Reasoning Enhanced LLMs to Finance}, journal = {CoRR}, volume = {abs/2502.08127}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2502.08127}, doi = {10.48550/ARXIV.2502.08127}, eprinttype = {arXiv}, eprint = {2502.08127}, } -
Retrieval-Augmented Large Language Models for Financial Time Series Forecasting
Model StockLLM 278 ↓ · 4 ♡
Model FinSeer 9 ↓ · 2 ♡
@article{DBLP:journals/corr/abs-2502-05878, author = {Mengxi Xiao and Zihao Jiang and Lingfei Qian and Zhengyu Chen and Yueru He and Yijing Xu and Yuechen Jiang and Dong Li and Ruey{-}Ling Weng and Min Peng and Jimin Huang and Sophia Ananiadou and Qianqian Xie}, title = {Enhancing Financial Time-Series Forecasting with Retrieval-Augmented Large Language Models}, journal = {CoRR}, volume = {abs/2502.05878}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2502.05878}, doi = {10.48550/ARXIV.2502.05878}, eprinttype = {arXiv}, eprint = {2502.05878}, } -
Open FinLLM Leaderboard: Towards Financial AI Readiness
Space Open-FinLLM-Leaderboard 7 ♡
@article{DBLP:journals/corr/abs-2501-10963, author = {Shengyuan Colin Lin and Felix Tian and Keyi Wang and Xingjian Zhao and Jimin Huang and Qianqian Xie and Luca Borella and Matt White and Christina Dan Wang and Kairong Xiao and Xiao{-}Yang Liu Yanglet and Li Deng}, title = {Open FinLLM Leaderboard: Towards Financial {AI} Readiness}, journal = {CoRR}, volume = {abs/2501.10963}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2501.10963}, doi = {10.48550/ARXIV.2501.10963}, eprinttype = {arXiv}, eprint = {2501.10963}, }
2024
-
FinCon: A Synthesized LLM Multi-Agent System with Conceptual Verbal Reinforcement for Enhanced Financial Decision Making
@inproceedings{DBLP:conf/nips/YuYLDJCCSCLXZSX24, author = {Yangyang Yu and Zhiyuan Yao and Haohang Li and Zhiyang Deng and Yuechen Jiang and Yupeng Cao and Zhi Chen and Jordan W. Suchow and Zhenyu Cui and Rong Liu and Zhaozhuo Xu and Denghui Zhang and Koduvayur Subbalakshmi and Guojun Xiong and Yueru He and Jimin Huang and Dong Li and Qianqian Xie}, editor = {Amir Globersons and Lester Mackey and Danielle Belgrave and Angela Fan and Ulrich Paquet and Jakub M. Tomczak and Cheng Zhang}, title = {FinCon: {A} Synthesized {LLM} Multi-Agent System with Conceptual Verbal Reinforcement for Enhanced Financial Decision Making}, booktitle = {Advances in Neural Information Processing Systems 37: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024}, year = {2024}, url = {http://papers.nips.cc/paper\_files/paper/2024/hash/f7ae4fe91d96f50abc2211f09b6a7e49-Abstract-Conference.html}, } -
FinBen: A Holistic Financial Benchmark for Large Language Models
Datasets FinBen tasks (×2) 385 ↓
Datasets FLARE benchmark tasks (×64) 4.6k ↓
Space IJCAI-2024-FinLLM-Learderboard 4 ♡
@inproceedings{DBLP:conf/nips/XieHCXZHXLDFXKK24, author = {Qianqian Xie and Weiguang Han and Zhengyu Chen and Ruoyu Xiang and Xiao Zhang and Yueru He and Mengxi Xiao and Dong Li and Yongfu Dai and Duanyu Feng and Yijing Xu and Haoqiang Kang and Ziyan Kuang and Chenhan Yuan and Kailai Yang and Zheheng Luo and Tianlin Zhang and Zhiwei Liu and Guojun Xiong and Zhiyang Deng and Yuechen Jiang and Zhiyuan Yao and Haohang Li and Yangyang Yu and Gang Hu and Jiajia Huang and Xiao{-}Yang Liu and Alejandro Lopez{-}Lira and Benyou Wang and Yanzhao Lai and Hao Wang and Min Peng and Sophia Ananiadou and Jimin Huang}, editor = {Amir Globersons and Lester Mackey and Danielle Belgrave and Angela Fan and Ulrich Paquet and Jakub M. Tomczak and Cheng Zhang}, title = {FinBen: {A} Holistic Financial Benchmark for Large Language Models}, booktitle = {Advances in Neural Information Processing Systems 37: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024}, year = {2024}, url = {http://papers.nips.cc/paper\_files/paper/2024/hash/adb1d9fa8be4576d28703b396b82ba1b-Abstract-Datasets\_and\_Benchmarks\_Track.html}, } -
Harmonic: Harnessing LLMs for Tabular Data Synthesis and Privacy Protection
@inproceedings{DBLP:conf/nips/WangFDCHAXW24, author = {Yuxin Wang and Duanyu Feng and Yongfu Dai and Zhengyu Chen and Jimin Huang and Sophia Ananiadou and Qianqian Xie and Hao Wang}, editor = {Amir Globersons and Lester Mackey and Danielle Belgrave and Angela Fan and Ulrich Paquet and Jakub M. Tomczak and Cheng Zhang}, title = {{HARMONIC:} Harnessing LLMs for Tabular Data Synthesis and Privacy Protection}, booktitle = {Advances in Neural Information Processing Systems 37: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024}, year = {2024}, url = {http://papers.nips.cc/paper\_files/paper/2024/hash/b5aebe9a48398525a9da27a1df827d60-Abstract-Datasets\_and\_Benchmarks\_Track.html}, } -
Dólares or Dollars? Unraveling the Bilingual Prowess of Financial LLMs Between Spanish and English
@inproceedings{DBLP:conf/kdd/ZhangXYFHLLQA0H24, author = {Xiao Zhang and Ruoyu Xiang and Chenhan Yuan and Duanyu Feng and Weiguang Han and Alejandro Lopez{-}Lira and Xiao{-}Yang Liu and Meikang Qiu and Sophia Ananiadou and Min Peng and Jimin Huang and Qianqian Xie}, editor = {Ricardo Baeza{-}Yates and Francesco Bonchi}, title = {D{\'{o}}lares or Dollars? Unraveling the Bilingual Prowess of Financial LLMs Between Spanish and English}, booktitle = {Proceedings of the 30th {ACM} {SIGKDD} Conference on Knowledge Discovery and Data Mining, {KDD} 2024, Barcelona, Spain, August 25-29, 2024}, pages = {6236--6246}, publisher = {{ACM}}, year = {2024}, url = {https://doi.org/10.1145/3637528.3671554}, doi = {10.1145/3637528.3671554}, } -
Back to the Future: Towards Explainable Temporal Reasoning with Large Language Models
@inproceedings{DBLP:conf/www/YuanXHA24, author = {Chenhan Yuan and Qianqian Xie and Jimin Huang and Sophia Ananiadou}, editor = {Tat{-}Seng Chua and Chong{-}Wah Ngo and Ravi Kumar and Hady W. Lauw and Roy Ka{-}Wei Lee}, title = {Back to the Future: Towards Explainable Temporal Reasoning with Large Language Models}, booktitle = {Proceedings of the {ACM} on Web Conference 2024, {WWW} 2024, Singapore, May 13-17, 2024}, pages = {1963--1974}, publisher = {{ACM}}, year = {2024}, url = {https://doi.org/10.1145/3589334.3645376}, doi = {10.1145/3589334.3645376}, } -
AuditWen: An Open-Source Large Language Model for Audit
@inproceedings{DBLP:conf/cncl/HuangZXZXH24, author = {Jiajia Huang and Haoran Zhu and Chao Xu and Tianming Zhan and Qianqian Xie and Jimin Huang}, editor = {Maosong Sun and Jiye Liang and Xianpei Han and Zhiyuan Liu and Yulan He and Gaoqi Rao and Yubo Chen and Zhiliang Tian}, title = {AuditWen: An Open-Source Large Language Model for Audit}, booktitle = {Chinese Computational Linguistics - 23rd China National Conference, {CCL} 2024, Taiyuan, China, July 25-28, 2024, Proceedings}, series = {Lecture Notes in Computer Science}, pages = {505--521}, publisher = {Springer}, year = {2024}, url = {https://doi.org/10.1007/978-981-97-8367-0\_30}, doi = {10.1007/978-981-97-8367-0\_30}, } -
FinNLP-AgentScen-2024 Shared Task: Financial Challenges in Large Language Models
@inproceedings{thefin_finnlp24, title = {FinNLP-AgentScen-2024 Shared Task: Financial Challenges in Large Language Models}, author = {Q. Xie and J. Huang and D. Li and Z. Chen and R. Xiang and M. Xiao and Y. Yu and V. Somasundaram and others}, booktitle = {FinNLP Workshop 2024}, year = {2024} } -
Open-FinLLMs: Open Multimodal Large Language Models for Financial Applications
Model FinLLaMA-instruct 0 ↓ · 9 ♡
Model FinLLaMA 0 ↓ · 5 ♡
Model FinLLaVA 14 ↓ · 24 ♡
Space FinLLaVA
@article{DBLP:journals/corr/abs-2408-11878, author = {Qianqian Xie and Dong Li and Mengxi Xiao and Zihao Jiang and Ruoyu Xiang and Xiao Zhang and Zhengyu Chen and Yueru He and Weiguang Han and Yuzhe Yang and Shunian Chen and Yifei Zhang and Lihang Shen and Daniel S. Kim and Zhiwei Liu and Zheheng Luo and Yangyang Yu and Yupeng Cao and Zhiyang Deng and Zhiyuan Yao and Haohang Li and Duanyu Feng and Yongfu Dai and VijayaSai Somasundaram and Peng Lu and Yilun Zhao and Yitao Long and Guojun Xiong and Kaleb E. Smith and Honghai Yu and Yanzhao Lai and Min Peng and Jianyun Nie and Jordan W. Suchow and Xiao{-}Yang Liu and Benyou Wang and Alejandro Lopez{-}Lira and Jimin Huang and Sophia Ananiadou}, title = {Open-FinLLMs: Open Multimodal Large Language Models for Financial Applications}, journal = {CoRR}, volume = {abs/2408.11878}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2408.11878}, doi = {10.48550/ARXIV.2408.11878}, eprinttype = {arXiv}, eprint = {2408.11878}, } -
A Report on Financial Regulations Challenge at COLING 2025
@misc{thefin_regchallenge, title = {A Report on Financial Regulations Challenge at COLING 2025}, author = {K. Wang and J. Patel and C. Shen and D. Kim and A. Zhu and A. Lin and L. Borella and C. Osborne and others}, year = {2024}, eprint = {2412.11159}, archivePrefix = {arXiv} } -
MetaAligner: Conditional Weak-to-Strong Correction for Generalizable Multi-Objective Alignment of Language Models
@article{DBLP:journals/corr/abs-2403-17141, author = {Kailai Yang and Zhiwei Liu and Qianqian Xie and Tianlin Zhang and Nirui Song and Jimin Huang and Ziyan Kuang and Sophia Ananiadou}, title = {MetaAligner: Conditional Weak-to-Strong Correction for Generalizable Multi-Objective Alignment of Language Models}, journal = {CoRR}, volume = {abs/2403.17141}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2403.17141}, doi = {10.48550/ARXIV.2403.17141}, eprinttype = {arXiv}, eprint = {2403.17141}, } -
No Language Is an Island: Unifying Chinese and English in Financial Large Language Models, Instruction Data, and Benchmarks
@article{DBLP:journals/corr/abs-2403-06249, author = {Gang Hu and Ke Qin and Chenhan Yuan and Min Peng and Alejandro Lopez{-}Lira and Benyou Wang and Sophia Ananiadou and Wanlong Yu and Jimin Huang and Qianqian Xie}, title = {No Language is an Island: Unifying Chinese and English in Financial Large Language Models, Instruction Data, and Benchmarks}, journal = {CoRR}, volume = {abs/2403.06249}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2403.06249}, doi = {10.48550/ARXIV.2403.06249}, eprinttype = {arXiv}, eprint = {2403.06249}, }
2023
-
PIXIU: A Comprehensive Benchmark, Instruction Dataset and Large Language Model for Finance
Model finma-7b-full 40 ↓ · 17 ♡
Model finma-7b-nlp 1.1k ↓ · 7 ♡
Datasets FLARE benchmark tasks (×64) 4.6k ↓
@inproceedings{DBLP:conf/nips/XieHZLPLH23, author = {Qianqian Xie and Weiguang Han and Xiao Zhang and Yanzhao Lai and Min Peng and Alejandro Lopez{-}Lira and Jimin Huang}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {{PIXIU:} {A} Comprehensive Benchmark, Instruction Dataset and Large Language Model for Finance}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/6a386d703b50f1cf1f61ab02a15967bb-Abstract-Datasets\_and\_Benchmarks.html}, } -
Select and Trade: Towards Unified Pair Trading with Hierarchical Reinforcement Learning
@inproceedings{DBLP:conf/kdd/HanZXPLH23, author = {Weiguang Han and Boyi Zhang and Qianqian Xie and Min Peng and Yanzhao Lai and Jimin Huang}, editor = {Ambuj K. Singh and Yizhou Sun and Leman Akoglu and Dimitrios Gunopulos and Xifeng Yan and Ravi Kumar and Fatma Ozcan and Jieping Ye}, title = {Select and Trade: Towards Unified Pair Trading with Hierarchical Reinforcement Learning}, booktitle = {Proceedings of the 29th {ACM} {SIGKDD} Conference on Knowledge Discovery and Data Mining, {KDD} 2023, Long Beach, CA, USA, August 6-10, 2023}, pages = {4123--4134}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3580305.3599951}, doi = {10.1145/3580305.3599951}, } -
Empowering Many, Biasing a Few: Generalist Credit Scoring through Large Language Models
Dataset lendingclub-benchmark 51 ↓ · 1 ♡
@article{DBLP:journals/corr/abs-2310-00566, author = {Duanyu Feng and Yongfu Dai and Jimin Huang and Yifang Zhang and Qianqian Xie and Weiguang Han and Alejandro Lopez{-}Lira and Hao Wang}, title = {Empowering Many, Biasing a Few: Generalist Credit Scoring through Large Language Models}, journal = {CoRR}, volume = {abs/2310.00566}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.00566}, doi = {10.48550/ARXIV.2310.00566}, eprinttype = {arXiv}, eprint = {2310.00566}, } -
The Wall Street Neophyte: A Zero-Shot Analysis of ChatGPT over Multimodal Stock Movement Prediction Challenges
@article{DBLP:journals/corr/abs-2304-05351, author = {Qianqian Xie and Weiguang Han and Yanzhao Lai and Min Peng and Jimin Huang}, title = {The Wall Street Neophyte: {A} Zero-Shot Analysis of ChatGPT Over MultiModal Stock Movement Prediction Challenges}, journal = {CoRR}, volume = {abs/2304.05351}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2304.05351}, doi = {10.48550/ARXIV.2304.05351}, eprinttype = {arXiv}, eprint = {2304.05351}, }
No publications match your search.
The Fin AI