GitHub - bytedance/SALMONN: SALMONN family: A suite of advanced multi-modal LLMs
The SALMONN model family consists of a series of advanced multi-modal large language models. For more details, please refer to the corresponding branches.
@inproceedings{
sun2025videosalmonno1,
title={{video-SALMONN-o1}: Reasoning-enhanced Audio-visual Large Language Model},
author={Guangzhi Sun, Yudong Yang, Jimin Zhuang, Changli Tang, Yixuan Li, Wei Li, Zejun MA, Chao Zhang},
booktitle={ICML},
year={2025}
}
@article{tang2025video,
title={{video-SALMONN 2: Captioning-Enhanced Audio-Visual Large Language Models}},
author={Changli Tang and Yixuan Li and Yudong Yang and Jimin Zhuang and Guangzhi Sun and Wei Li and Zejun Ma and Chao Zhang},
journal={arXiv preprint arXiv:2506.15220},
year={2025},
}
@inproceedings{wang2024enabling,
title={Enabling Auditory Large Language Models for Automatic Speech Quality Evaluation},
author={Wang, Siyin and Yu, Wenyi and Yang, Yudong and Tang, Changli and Li, Yixuan and Zhuang, Jimin and Chen, Xianzhao and Tian, Xiaohai and Zhang, Jun and Sun, Guangzhi and others},
booktitle={Proc. ICASSP},
address={Hyderabad},
year={2025}
}
@inproceedings{wang2024enabling,
title={QualiSpeech: A Speech Quality Assessment Dataset with Natural Language Reasoning and Descriptions},
author={Wang, Siyin and Yu, Wenyi and Chen, Xianzhao and Tian, Xiaohai and Zhang, Jun and Sun, Guangzhi and others},
booktitle={Proc. ACL},
address={Vienna},
year={2025}
}
@inproceedings{
sun2024videosalmonn,
title={video-{SALMONN}: Speech-Enhanced Audio-Visual Large Language Models},
author={Guangzhi Sun and Wenyi Yu and Changli Tang and Xianzhao Chen and Tian Tan and Wei Li and Lu Lu and Zejun MA and Yuxuan Wang and Chao Zhang},
booktitle={Forty-first International Conference on Machine Learning},
year={2024},
url={https://openreview.net/forum?id=nYsh5GFIqX}
}
@inproceedings{
tang2024salmonn,
title={SALMONN: Towards Generic Hearing Abilities for Large Language Models},
author={Changli Tang and Wenyi Yu and Guangzhi Sun and Xianzhao Chen and Tian Tan and Wei Li and Lu Lu and Zejun MA and Chao Zhang},
booktitle={The Twelfth International Conference on Learning Representations},
year={2024},
url={https://openreview.net/forum?id=14rn7HpKVk}
}