{"id":733513,"date":"2021-04-20T21:08:04","date_gmt":"2021-04-21T04:08:04","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-project&#038;p=733513"},"modified":"2022-07-12T23:43:28","modified_gmt":"2022-07-13T06:43:28","slug":"ocr-and-document-understanding","status":"publish","type":"msr-project","link":"https:\/\/www.microsoft.com\/en-us\/research\/project\/ocr-and-document-understanding\/","title":{"rendered":"OCR and Document Understanding"},"content":{"rendered":"<p class=\"mr-2\">We have been developing SOTA technologies and industry-leading product solutions for following scenarios: (1) Universal OCR to detect and recognize any text in image\/PDF; (2) Universal math OCR to detect and recognize any math expression in image\/PDF; (3) Universal table understanding to detect, recognize, and understand any tables in image\/PDF; (4) Universal layout analysis to detect page objects such as text blocks, lists, tables, math equations, figures, etc. in any image\/PDF, identify their relationships, and determine the reading order of body text; (5) Universal information extraction to extract entities, key\/value pairs, item lists and other intended information from any image\/PDF document; (6) Synthetic data generation for the above scenarios to reduce cost, improve accuracy, and increase the speed of innovation.<\/p>\n<p>Related links:<\/p>\n<div class=\"row project-media\">\n<ul>\n<li class=\"col-12 team-resource\"><a class=\"basic\" href=\"https:\/\/aka.ms\/ocr-docs\" target=\"_blank\" rel=\"noopener\">OCR<\/a><\/li>\n<li class=\"col-12 team-resource\"><a class=\"basic\" href=\"https:\/\/aka.ms\/form-recognizer\/docs\" target=\"_blank\" rel=\"noopener\">Form Recognizer<\/a><\/li>\n<li class=\"col-12 team-resource\"><a class=\"basic\" href=\"https:\/\/math.microsoft.com\/en\" target=\"_blank\" rel=\"noopener\">Microsoft Math Solver<\/a><\/li>\n<\/ul>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>We have been developing SOTA technologies and industry-leading product solutions for following scenarios: (1) Universal OCR to detect and recognize any text in image\/PDF; (2) Universal math OCR to detect and recognize any math expression in image\/PDF; (3) Universal table understanding to detect, recognize, and understand any tables in image\/PDF; (4) Universal layout analysis to [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","footnotes":""},"research-area":[13551],"msr-locale":[268875],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-733513","msr-project","type-msr-project","status-publish","hentry","msr-research-area-graphics-and-multimedia","msr-locale-en_us","msr-archive-status-active"],"msr_project_start":"","related-publications":[],"related-downloads":[],"related-videos":[],"related-groups":[144778],"related-events":[],"related-opportunities":[],"related-posts":[],"related-articles":[],"tab-content":[],"slides":[],"related-researchers":[{"type":"user_nicename","display_name":"Qiang Huo","user_id":33297,"people_section":"Section name 0","alias":"qianghuo"}],"msr_research_lab":[199560],"msr_impact_theme":[],"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/733513","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-project"}],"version-history":[{"count":3,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/733513\/revisions"}],"predecessor-version":[{"id":859134,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/733513\/revisions\/859134"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=733513"}],"wp:term":[{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=733513"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=733513"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=733513"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=733513"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}