{"id":171454,"date":"2015-04-09T09:50:39","date_gmt":"2015-04-09T09:50:39","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/project\/from-captions-to-visual-concepts-and-back\/"},"modified":"2019-08-19T10:38:03","modified_gmt":"2019-08-19T17:38:03","slug":"from-captions-to-visual-concepts-and-back","status":"publish","type":"msr-project","link":"https:\/\/www.microsoft.com\/en-us\/research\/project\/from-captions-to-visual-concepts-and-back\/","title":{"rendered":"From Captions to Visual Concepts and Back"},"content":{"rendered":"<p><img loading=\"lazy\" decoding=\"async\" class=\"alignleft size-medium wp-image-213084\" src=\"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2015\/04\/teaser-258x300.jpg\" alt=\"teaser\" width=\"258\" height=\"300\" srcset=\"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2015\/04\/teaser-258x300.jpg 258w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2015\/04\/teaser.jpg 341w\" sizes=\"auto, (max-width: 258px) 100vw, 258px\" \/>We introduce a novel approach for automatically generating image descriptions. Visual detectors, language models, and deep multimodal similarity models are learned directly from a dataset of image captions. Our system is state-of-the-art on the official Microsoft COCO benchmark, producing a BLEU-4 score of 29.1%. Human judges consider the captions to be as good as or better than humans 34% of the time.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>We introduce a novel approach for automatically generating image descriptions. Visual detectors, language models, and deep multimodal similarity models are learned directly from a dataset of image captions. Our system is state-of-the-art on the official Microsoft COCO benchmark, producing a BLEU-4 score of 29.1%. Human judges consider the captions to be as good as or [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","footnotes":""},"research-area":[13562,13545],"msr-locale":[268875],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-171454","msr-project","type-msr-project","status-publish","hentry","msr-research-area-computer-vision","msr-research-area-human-language-technologies","msr-locale-en_us","msr-archive-status-active"],"msr_project_start":"4\/9\/2015","related-publications":[168020],"related-downloads":[],"related-videos":[],"related-groups":[],"related-events":[],"related-opportunities":[],"related-posts":[],"related-articles":[],"tab-content":[{"id":0,"name":"Interns","content":"<ul>\r\n \t<li><a href=\"http:\/\/students.washington.edu\/hfang\/#Hao%20Fang\" target=\"_blank\" rel=\"noopener\">Hao Fang<\/a><\/li>\r\n \t<li><a href=\"http:\/\/www.cs.berkeley.edu\/~sgupta\/\" target=\"_blank\" rel=\"noopener\">Saurabh Gupta<\/a><\/li>\r\n \t<li><a href=\"http:\/\/www.forrestiandola.com\/\" target=\"_blank\" rel=\"noopener\">Forrest Iandola<\/a><\/li>\r\n \t<li><a href=\"http:\/\/people.idsia.ch\/~rupesh\/\" target=\"_blank\" rel=\"noopener\">Rupesh Srivastava<\/a><\/li>\r\n<\/ul>"}],"slides":[],"related-researchers":[{"type":"user_nicename","display_name":"Jianfeng Gao","user_id":32246,"people_section":"Group 1","alias":"jfgao"}],"msr_research_lab":[],"msr_impact_theme":[],"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/171454","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-project"}],"version-history":[{"count":4,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/171454\/revisions"}],"predecessor-version":[{"id":604233,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/171454\/revisions\/604233"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=171454"}],"wp:term":[{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=171454"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=171454"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=171454"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=171454"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}