{"id":1139775,"date":"2025-05-20T01:41:13","date_gmt":"2025-05-20T08:41:13","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-research-item&#038;p=1139775"},"modified":"2025-10-31T13:41:56","modified_gmt":"2025-10-31T20:41:56","slug":"llava-rad-mimic-cxr-annotations","status":"publish","type":"msr-research-item","link":"https:\/\/www.microsoft.com\/en-us\/research\/publication\/llava-rad-mimic-cxr-annotations\/","title":{"rendered":"LLaVA-Rad MIMIC-CXR Annotations"},"content":{"rendered":"<p>LLaVA-Rad MIMIC-CXR features more accurate section extractions from MIMIC-CXR free-text radiology reports. Traditionally, rule-based methods were used to extract sections such as the reason for exam, findings, and impression. However, these approaches often fail due to inconsistencies in report structure and clinical language. In this work, we leverage GPT-4 to extract these sections more reliably, adding 237,073 image-text pairs to the training split and 1,952 pairs to the validation split. This enhancement afforded the development and fine-tuning of LLaVA-Rad, a multimodal large language model (LLM) tailored for radiology applications, achieving improved performance on report generation tasks. This resource is provided to support reproducibility and for the benefit of the research community, enabling further exploration in vision\u2013language modeling. For more details, please refer to the accompanying paper.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>LLaVA-Rad MIMIC-CXR features more accurate section extractions from MIMIC-CXR free-text radiology reports. Traditionally, rule-based methods were used to extract sections such as the reason for exam, findings, and impression. However, these approaches often fail due to inconsistencies in report structure and clinical language. In this work, we leverage GPT-4 to extract these sections more reliably, [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"","msr_publisher_other":"","msr_booktitle":"","msr_chapter":"","msr_edition":"","msr_editors":"","msr_how_published":"PhysioNet","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"","msr_pages_string":"","msr_page_range_start":"","msr_page_range_end":"","msr_series":"","msr_volume":"","msr_copyright":"","msr_conference_name":"","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"","msr_pubmed_id":"","msr_other_authors":"","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":"","msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2025-1-24","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":0,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":true,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":null,"footnotes":""},"msr-research-highlight":[],"research-area":[13556,13562,13553],"msr-publication-type":[193724],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[269148,269142],"msr-field-of-study":[268632],"msr-conference":[],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-1139775","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-artificial-intelligence","msr-research-area-computer-vision","msr-research-area-medical-health-genomics","msr-locale-en_us","msr-post-option-approved-for-river","msr-post-option-include-in-river","msr-field-of-study-multimodal-large-language-models"],"msr_publishername":"","msr_edition":"","msr_affiliation":"","msr_published_date":"2025-1-24","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"PhysioNet","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":1,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"url","viewUrl":"false","id":"false","title":"https:\/\/physionet.org\/content\/llava-rad-mimic-cxr-annotation\/1.0.0\/","label_id":"243109","label":0}],"msr_related_uploader":"","msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[],"msr-author-ordering":[{"type":"edited_text","value":"Juan Manuel Zambrano Chaves (juanza)","user_id":43578,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Juan Manuel Zambrano Chaves (juanza)"},{"type":"text","value":"Shih-Cheng Huang","user_id":0,"rest_url":false},{"type":"text","value":"Yanbo Xu","user_id":0,"rest_url":false},{"type":"text","value":"Hanwen Xu","user_id":0,"rest_url":false},{"type":"edited_text","value":"Naoto Usuyama (naotous)","user_id":38670,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Naoto Usuyama (naotous)"},{"type":"user_nicename","value":"Sheng Zhang","user_id":39087,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Sheng Zhang"},{"type":"text","value":"Fei Wang","user_id":0,"rest_url":false},{"type":"guest","value":"yujia-xie","user_id":946596,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=yujia-xie"},{"type":"user_nicename","value":"Mahmoud Khademi","user_id":42297,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Mahmoud Khademi"},{"type":"text","value":"Ziyi Yang","user_id":0,"rest_url":false},{"type":"text","value":"Hany Awadalla","user_id":0,"rest_url":false},{"type":"text","value":"Julia Gong","user_id":0,"rest_url":false},{"type":"guest","value":"houdong-hu","user_id":802588,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=houdong-hu"},{"type":"user_nicename","value":"Jianwei Yang","user_id":40261,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Jianwei Yang"},{"type":"text","value":"Chunyuan Li","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Jianfeng Gao","user_id":32246,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Jianfeng Gao"},{"type":"user_nicename","value":"Aiden Gu","user_id":44020,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Aiden Gu"},{"type":"user_nicename","value":"Cliff Wong","user_id":38508,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Cliff Wong"},{"type":"text","value":"Mu-Hsin Wei","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Tristan Naumann","user_id":37929,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Tristan Naumann"},{"type":"text","value":"Muhao Chen","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Matthew Lungren","user_id":42792,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Matthew Lungren"},{"type":"text","value":"Akshay Chaudhari","user_id":0,"rest_url":false},{"type":"text","value":"Serena Yeung","user_id":0,"rest_url":false},{"type":"text","value":"Curtis Langlotz","user_id":0,"rest_url":false},{"type":"text","value":"Sheng Wang","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Hoifung Poon","user_id":32016,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Hoifung Poon"}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[],"msr_project":[],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"miscellaneous","related_content":[],"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1139775","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":5,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1139775\/revisions"}],"predecessor-version":[{"id":1154364,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1139775\/revisions\/1154364"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=1139775"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=1139775"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=1139775"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=1139775"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=1139775"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=1139775"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=1139775"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=1139775"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=1139775"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=1139775"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=1139775"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=1139775"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=1139775"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}