{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:21:39Z","timestamp":1772907699000,"version":"3.50.1"},"reference-count":70,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001348","name":"Agency for Science, Technology and Research","doi-asserted-by":"publisher","award":["M23L7b0021"],"award-info":[{"award-number":["M23L7b0021"]}],"id":[{"id":"10.13039\/501100001348","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1016\/j.neucom.2025.130018","type":"journal-article","created":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T03:57:32Z","timestamp":1742270252000},"page":"130018","update-policy":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":3,"special_numbering":"C","title":["Multimodal multitask similarity learning for vision language model on radiological images and reports"],"prefix":"10.1016","volume":"636","author":[{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0003-4700-284X","authenticated-orcid":false,"given":"Yang","family":"Yu","sequence":"first","affiliation":[]},{"given":"Jiahao","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Weide","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Ivan","family":"Ho Mien","sequence":"additional","affiliation":[]},{"given":"Pavitra","family":"Krishnaswamy","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0002-7002-4564","authenticated-orcid":false,"given":"Xulei","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0003-1786-6188","authenticated-orcid":false,"given":"Jun","family":"Cheng","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.neucom.2025.130018_b1","article-title":"On the interpretability of artificial intelligence in radiology: challenges and opportunities","volume":"2","author":"Reyes","year":"2020","journal-title":"Radiol.: Artif. Intell."},{"issue":"7956","key":"10.1016\/j.neucom.2025.130018_b2","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1038\/s41586-023-05881-4","article-title":"Foundation models for generalist medical artificial intelligence","volume":"616","author":"Moor","year":"2023","journal-title":"Nature"},{"key":"10.1016\/j.neucom.2025.130018_b3","article-title":"Lvit: language meets vision transformer in medical image segmentation","author":"Li","year":"2023","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.neucom.2025.130018_b4","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"724","article-title":"Ariadne\u2019s thread: Using text prompts to improve segmentation of infected areas from chest X-ray images","author":"Zhong","year":"2023"},{"key":"10.1016\/j.neucom.2025.130018_b5","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"537","article-title":"Text-guided cross-position attention for segmentation: Case of medical image","author":"Lee","year":"2023"},{"key":"10.1016\/j.neucom.2025.130018_b6","article-title":"Improving medical speech-to-text accuracy using vision-language pre-training models","author":"Huh","year":"2023","journal-title":"IEEE J. Biomed. Heal. Inform."},{"key":"10.1016\/j.neucom.2025.130018_b7","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1016\/j.neucom.2023.03.011","article-title":"Cross-modal transformer with language query for referring image segmentation","volume":"536","author":"Zhang","year":"2023","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2025.130018_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2023.03.057","article-title":"Efficient text-image semantic search: A multi-modal vision-language approach for fashion retrieval","volume":"538","author":"Moro","year":"2023","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2025.130018_b9","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.127530","article-title":"Cross-modal concept learning and inference for vision-language models","volume":"583","author":"Zhang","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2025.130018_b10","series-title":"Machine Learning for Healthcare Conference","first-page":"2","article-title":"Contrastive learning of medical visual representations from paired images and text","author":"Zhang","year":"2022"},{"key":"10.1016\/j.neucom.2025.130018_b11","unstructured":"Hong-Yu Zhou, Chenyu Lian, Liansheng Wang, Yizhou Yu, Advancing Radiograph Representation Learning with Masked Record Modeling, in: The Eleventh International Conference on Learning Representations, 2022."},{"key":"10.1016\/j.neucom.2025.130018_b12","series-title":"European Conference on Computer Vision","first-page":"1","article-title":"Making the most of text semantics to improve biomedical vision\u2013language processing","author":"Boecking","year":"2022"},{"key":"10.1016\/j.neucom.2025.130018_b13","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"493","article-title":"Contrastive masked image-text modeling for medical visual representation learning","author":"Chen","year":"2023"},{"key":"10.1016\/j.neucom.2025.130018_b14","article-title":"Multi-task paired masking with alignment modeling for medical vision-language pre-training","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Multimed."},{"issue":"12","key":"10.1016\/j.neucom.2025.130018_b15","doi-asserted-by":"crossref","first-page":"1399","DOI":"10.1038\/s41551-022-00936-9","article-title":"Expert-level detection of pathologies from unannotated chest X-ray images via self-supervised learning","volume":"6","author":"Tiu","year":"2022","journal-title":"Nat. Biomed. Eng."},{"issue":"3","key":"10.1016\/j.neucom.2025.130018_b16","doi-asserted-by":"crossref","first-page":"863","DOI":"10.1038\/s41591-024-02856-4","article-title":"A visual-language foundation model for computational pathology","volume":"30","author":"Lu","year":"2024","journal-title":"Nature Med."},{"key":"10.1016\/j.neucom.2025.130018_b17","first-page":"1","article-title":"Vision\u2013language foundation model for echocardiogram interpretation","author":"Christensen","year":"2024","journal-title":"Nature Med."},{"issue":"12","key":"10.1016\/j.neucom.2025.130018_b18","doi-asserted-by":"crossref","first-page":"6070","DOI":"10.1109\/JBHI.2022.3207502","article-title":"Multi-modal understanding and generation for medical images and text via vision-language pre-training","volume":"26","author":"Moon","year":"2022","journal-title":"IEEE J. Biomed. Heal. Inform."},{"key":"10.1016\/j.neucom.2025.130018_b19","series-title":"Self-supervised image-text pre-training with mixed data in chest x-rays","author":"Wang","year":"2021"},{"key":"10.1016\/j.neucom.2025.130018_b20","doi-asserted-by":"crossref","unstructured":"Shih-Cheng Huang, Liyue Shen, Matthew P Lungren, Serena Yeung, Gloria: A multimodal global-local representation learning framework for label-efficient medical image recognition, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 3942\u20133951.","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"10.1016\/j.neucom.2025.130018_b21","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"101","article-title":"Cxr-clip: Toward large scale chest x-ray language-image pre-training","author":"You","year":"2023"},{"key":"10.1016\/j.neucom.2025.130018_b22","unstructured":"Yangguang Li, Feng Liang, Lichen Zhao, Yufeng Cui, Wanli Ouyang, Jing Shao, Fengwei Yu, Junjie Yan, Supervision Exists Everywhere: A Data Efficient Contrastive Language-Image Pre-training Paradigm, in: International Conference on Learning Representations, 2021."},{"issue":"1","key":"10.1016\/j.neucom.2025.130018_b23","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1038\/s42256-021-00425-9","article-title":"Generalized radiograph representation learning via cross-supervision between images and free-text radiology reports","volume":"4","author":"Zhou","year":"2022","journal-title":"Nat. Mach. Intell."},{"key":"10.1016\/j.neucom.2025.130018_b24","first-page":"33536","article-title":"Multi-granularity cross-modal alignment for generalized medical visual representation learning","volume":"35","author":"Wang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2025.130018_b25","unstructured":"Chaoyi Wu, Xiaoman Zhang, Ya Zhang, Yanfeng Wang, Weidi Xie, Medklip: Medical knowledge enhanced language-image pre-training for x-ray diagnosis, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023, pp. 21372\u201321383."},{"key":"10.1016\/j.neucom.2025.130018_b26","doi-asserted-by":"crossref","unstructured":"Zifeng Wang, Zhenbang Wu, Dinesh Agarwal, Jimeng Sun, MedCLIP: Contrastive Learning from Unpaired Medical Images and Text, in: 2022 Conference on Empirical Methods in Natural Language Processing, EMNLP 2022, 2022.","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"10.1016\/j.neucom.2025.130018_b27","article-title":"Improving medical vision-language contrastive pretraining with semantics-aware triage","author":"Liu","year":"2023","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.neucom.2025.130018_b28","series-title":"Eye-gaze guided multi-modal alignment framework for radiology","author":"Ma","year":"2024"},{"key":"10.1016\/j.neucom.2025.130018_b29","doi-asserted-by":"crossref","unstructured":"Haoran Lai, Qingsong Yao, Zihang Jiang, Rongsheng Wang, Zhiyang He, Xiaodong Tao, S Kevin Zhou, Carzero: Cross-attention alignment for radiology zero-shot classification, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 11137\u201311146.","DOI":"10.1109\/CVPR52733.2024.01059"},{"key":"10.1016\/j.neucom.2025.130018_b30","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"issue":"5","key":"10.1016\/j.neucom.2025.130018_b31","doi-asserted-by":"crossref","first-page":"1532","DOI":"10.1109\/TMI.2022.3232411","article-title":"Medical visual question answering via conditional reasoning and contrastive learning","volume":"42","author":"Liu","year":"2022","journal-title":"IEEE Trans. Med. Imaging"},{"issue":"11","key":"10.1016\/j.neucom.2025.130018_b32","doi-asserted-by":"crossref","first-page":"5585","DOI":"10.1109\/TIP.2018.2852503","article-title":"Modality-specific cross-modal similarity measurement with recurrent attention network","volume":"27","author":"Peng","year":"2018","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.neucom.2025.130018_b33","doi-asserted-by":"crossref","first-page":"298","DOI":"10.1016\/j.ins.2020.08.009","article-title":"Drsl: Deep relational similarity learning for cross-modal retrieval","volume":"546","author":"Wang","year":"2021","journal-title":"Inform. Sci."},{"key":"10.1016\/j.neucom.2025.130018_b34","doi-asserted-by":"crossref","unstructured":"Peng Hu, Liangli Zhen, Dezhong Peng, Pei Liu, Scalable deep multimodal learning for cross-modal retrieval, in: Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval, 2019, pp. 635\u2013644.","DOI":"10.1145\/3331184.3331213"},{"key":"10.1016\/j.neucom.2025.130018_b35","doi-asserted-by":"crossref","unstructured":"Liangli Zhen, Peng Hu, Xu Wang, Dezhong Peng, Deep supervised cross-modal retrieval, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 10394\u201310403.","DOI":"10.1109\/CVPR.2019.01064"},{"issue":"2","key":"10.1016\/j.neucom.2025.130018_b36","doi-asserted-by":"crossref","first-page":"798","DOI":"10.1109\/TNNLS.2020.3029181","article-title":"Deep multimodal transfer learning for cross-modal retrieval","volume":"33","author":"Zhen","year":"2020","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.neucom.2025.130018_b37","doi-asserted-by":"crossref","unstructured":"Flood Sung, Yongxin Yang, Li Zhang, Tao Xiang, Philip HS Torr, Timothy M Hospedales, Learning to compare: Relation network for few-shot learning, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 1199\u20131208.","DOI":"10.1109\/CVPR.2018.00131"},{"key":"10.1016\/j.neucom.2025.130018_b38","series-title":"2019 IEEE International Conference on Multimedia & Expo Workshops","first-page":"198","article-title":"Self-attention relation network for few-shot learning","author":"Hui","year":"2019"},{"key":"10.1016\/j.neucom.2025.130018_b39","first-page":"35959","article-title":"Pyramidclip: Hierarchical feature alignment for vision-language model pretraining","volume":"35","author":"Gao","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2025.130018_b40","doi-asserted-by":"crossref","unstructured":"Yuting Gao, Jinfeng Liu, Zihan Xu, Tong Wu, Enwei Zhang, Ke Li, Jie Yang, Wei Liu, Xing Sun, Softclip: Softer cross-modal alignment makes clip stronger, in: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, 2024, pp. 1860\u20131868, 3.","DOI":"10.1609\/aaai.v38i3.27955"},{"key":"10.1016\/j.neucom.2025.130018_b41","unstructured":"Liu Yiyang, Liang James Chenhao, Tang Ruixiang, Lee Yugyung, RABBANI MAJID, Dianat Sohail, Rao Raghuveer, Huang Lifu, Liu Dongfang, Wang Qifan, Han Cheng, Re-Imagining Multimodal Instruction Tuning: A Representation View, in: Proceedings of the International Conference on Learning Representations, ICLR, 2025."},{"key":"10.1016\/j.neucom.2025.130018_b42","series-title":"RadVLM: A multitask conversational vision-language model for radiology","author":"Deperrois","year":"2025"},{"key":"10.1016\/j.neucom.2025.130018_b43","series-title":"RadAlign: Advancing radiology report generation with vision-language concept alignment","author":"Gu","year":"2025"},{"issue":"2","key":"10.1016\/j.neucom.2025.130018_b44","doi-asserted-by":"crossref","first-page":"638","DOI":"10.1109\/TMI.2018.2868977","article-title":"Learning cross-modality representations from multi-modal images","volume":"38","author":"van Tulder","year":"2018","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.neucom.2025.130018_b45","doi-asserted-by":"crossref","unstructured":"Taowen Wang, Yiyang Liu, James Liang, Junhan Zhao, Yiming Cui, Yuning Mao, Shaoliang Nie, Jiahao Liu, Fuli Feng, Zenglin Xu, et al., M2PT: Multimodal Prompt Tuning for Zero-shot Instruction Learning, in: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, 2024, pp. 3723\u20133740.","DOI":"10.18653\/v1\/2024.emnlp-main.218"},{"key":"10.1016\/j.neucom.2025.130018_b46","series-title":"2023 IEEE\/CVF International Conference on Computer Vision","first-page":"17445","article-title":"E 2 VPT: An effective and efficient approach for visual prompt tuning","author":"Han","year":"2023"},{"key":"10.1016\/j.neucom.2025.130018_b47","doi-asserted-by":"crossref","unstructured":"Runjia Zeng, Cheng Han, Qifan Wang, Chunshu Wu, Tong Geng, Lifu Huang, Ying Nian Wu, Dongfang Liu, Visual Fourier Prompt Tuning, in: The Thirty-Eighth Annual Conference on Neural Information Processing Systems, 2024.","DOI":"10.52202\/079017-0180"},{"key":"10.1016\/j.neucom.2025.130018_b48","series-title":"2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"14196","article-title":"Efficient multimodal semantic segmentation via dual-prompt learning","author":"Dong","year":"2024"},{"key":"10.1016\/j.neucom.2025.130018_b49","unstructured":"Cheng Han, Qifan Wang, Yiming Cui, Wenguan Wang, Lifu Huang, Siyuan Qi, Dongfang Liu, Facing the Elephant in the Room: Visual Prompt Tuning or Full finetuning?, in: The Twelfth International Conference on Learning Representations, 2024."},{"key":"10.1016\/j.neucom.2025.130018_b50","series-title":"Visual agents as fast and slow thinkers","author":"Sun","year":"2024"},{"issue":"1","key":"10.1016\/j.neucom.2025.130018_b51","doi-asserted-by":"crossref","first-page":"12","DOI":"10.4103\/2228-7477.83460","article-title":"CBMIR: Content-based image retrieval algorithm for medical image databases","volume":"1","author":"Pilevar","year":"2011","journal-title":"J. Med. Signals Sens."},{"key":"10.1016\/j.neucom.2025.130018_b52","doi-asserted-by":"crossref","first-page":"8","DOI":"10.1016\/j.neucom.2017.05.025","article-title":"Medical image retrieval using deep convolutional neural network","volume":"266","author":"Qayyum","year":"2017","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2025.130018_b53","series-title":"Machine Learning for Health","first-page":"209","article-title":"Retrieval-based chest x-ray report generation using a pre-trained contrastive language-image model","author":"Endo","year":"2021"},{"key":"10.1016\/j.neucom.2025.130018_b54","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2021: 24th International Conference, Strasbourg, France, September 27\u2013October 1, 2021, Proceedings, Part V 24","first-page":"603","article-title":"Multimodal multitask deep learning for X-ray image retrieval","author":"Yu","year":"2021"},{"key":"10.1016\/j.neucom.2025.130018_b55","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2021.107673","article-title":"Category supervised cross-modal hashing retrieval for chest x-ray and radiology reports","volume":"98","author":"Zhang","year":"2022","journal-title":"Comput. Electr. Eng."},{"key":"10.1016\/j.neucom.2025.130018_b56","series-title":"International Conference on Information Processing in Medical Imaging","first-page":"471","article-title":"X-tra: Improving chest x-ray tasks with cross-modal retrieval augmentation","author":"van Sonsbeek","year":"2023"},{"issue":"10","key":"10.1016\/j.neucom.2025.130018_b57","doi-asserted-by":"crossref","first-page":"2642","DOI":"10.1109\/TMI.2021.3054817","article-title":"Generalized zero-shot chest x-ray diagnosis through trait-guided multi-view semantic embedding with self-training","volume":"40","author":"Paul","year":"2021","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.neucom.2025.130018_b58","doi-asserted-by":"crossref","unstructured":"Dwarikanath Mahapatra, Behzad Bozorgtabar, Zongyuan Ge, Medical image classification using generalized zero shot learning, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 3344\u20133353.","DOI":"10.1109\/ICCVW54120.2021.00373"},{"key":"10.1016\/j.neucom.2025.130018_b59","doi-asserted-by":"crossref","unstructured":"Devraj Mandal, Sanath Narayan, Sai Kumar Dwivedi, Vikram Gupta, Shuaib Ahmed, Fahad Shahbaz Khan, Ling Shao, Out-of-distribution detection for generalized zero-shot action recognition, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 9985\u20139993.","DOI":"10.1109\/CVPR.2019.01022"},{"key":"10.1016\/j.neucom.2025.130018_b60","doi-asserted-by":"crossref","unstructured":"Akanksha Paul, Narayanan C. Krishnan, Prateek Munjal, Semantically aligned bias reducing zero shot learning, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 7056\u20137065.","DOI":"10.1109\/CVPR.2019.00722"},{"issue":"1","key":"10.1016\/j.neucom.2025.130018_b61","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","article-title":"MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports","volume":"6","author":"Johnson","year":"2019","journal-title":"Sci. Data"},{"key":"10.1016\/j.neucom.2025.130018_b62","doi-asserted-by":"crossref","unstructured":"Jeremy Irvin, Pranav Rajpurkar, Michael Ko, Yifan Yu, Silviana Ciurea-Ilcus, Chris Chute, Henrik Marklund, Behzad Haghgoo, Robyn Ball, Katie Shpanskaya, et al., Chexpert: A large chest radiograph dataset with uncertainty labels and expert comparison, in: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, 2019, pp. 590\u2013597, 01.","DOI":"10.1609\/aaai.v33i01.3301590"},{"issue":"1","key":"10.1016\/j.neucom.2025.130018_b63","article-title":"Augmenting the national institutes of health chest radiograph dataset with expert annotations of possible pneumonia","volume":"1","author":"Shih","year":"2019","journal-title":"Radiol.: Artif. Intell."},{"key":"10.1016\/j.neucom.2025.130018_b64","doi-asserted-by":"crossref","unstructured":"Gao Huang, Zhuang Liu, Laurens Van Der Maaten, Kilian Q Weinberger, Densely connected convolutional networks, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 4700\u20134708.","DOI":"10.1109\/CVPR.2017.243"},{"key":"10.1016\/j.neucom.2025.130018_b65","doi-asserted-by":"crossref","unstructured":"Akshay Smit, Saahil Jain, Pranav Rajpurkar, Anuj Pareek, Andrew Y Ng, Matthew Lungren, Combining Automatic Labelers and Expert Annotations for Accurate Radiology Report Labeling Using BERT, in: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP, 2020, pp. 1500\u20131519.","DOI":"10.18653\/v1\/2020.emnlp-main.117"},{"key":"10.1016\/j.neucom.2025.130018_b66","doi-asserted-by":"crossref","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun, Deep residual learning for image recognition, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10.1016\/j.neucom.2025.130018_b67","doi-asserted-by":"crossref","unstructured":"Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo, Swin transformer: Hierarchical vision transformer using shifted windows, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"10.1016\/j.neucom.2025.130018_b68","doi-asserted-by":"crossref","unstructured":"Emily Alsentzer, John Murphy, William Boag, Wei-Hung Weng, Di Jindi, Tristan Naumann, Matthew McDermott, Publicly Available Clinical BERT Embeddings, in: Proceedings of the 2nd Clinical Natural Language Processing Workshop, 2019, pp. 72\u201378.","DOI":"10.18653\/v1\/W19-1909"},{"key":"10.1016\/j.neucom.2025.130018_b69","series-title":"Proceedings of the Conference. Association for Computational Linguistics. North American Chapter. Meeting","first-page":"4533","article-title":"Leveraging deep representations of radiology reports in survival analysis for predicting heart failure patient mortality","author":"Lee","year":"2021"},{"issue":"11","key":"10.1016\/j.neucom.2025.130018_b70","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/api.elsevier.com\/content\/article\/PII:S0925231225006903?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/api.elsevier.com\/content\/article\/PII:S0925231225006903?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T03:24:22Z","timestamp":1769311462000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231225006903"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":70,"alternative-id":["S0925231225006903"],"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1016\/j.neucom.2025.130018","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2025,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Multimodal multitask similarity learning for vision language model on radiological images and reports","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1016\/j.neucom.2025.130018","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"130018"}}