@inproceedings{han2023html, title={HTML: Hybrid Temporal-scale Multimodal Learning Framework for Referring Video Object Segmentation}, author={Han, Mingfei and Wang, Yali and Li, Zhihui and Yao, Lina and Chang, Xiaojun and Qiao, Yu}, booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, year={2023}, organization={IEEE} }