{"id":1145968,"date":"2025-10-22T09:53:34","date_gmt":"2025-10-22T16:53:34","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/"},"modified":"2025-12-05T03:59:00","modified_gmt":"2025-12-05T11:59:00","slug":"efficient-ai","status":"publish","type":"msr-group","link":"https:\/\/www.microsoft.com\/en-us\/research\/group\/efficient-ai\/","title":{"rendered":"Efficient AI"},"content":{"rendered":"<section class=\"mb-3 moray-highlight\">\n\t<div class=\"card-img-overlay mx-lg-0\">\n\t\t<div class=\"card-background  has-background- card-background--full-bleed\">\n\t\t\t<img loading=\"lazy\" decoding=\"async\" width=\"2560\" height=\"1463\" src=\"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-scaled.jpg\" class=\"attachment-full size-full\" alt=\"AID landing page banner\" style=\"\" srcset=\"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-scaled.jpg 2560w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-300x171.jpg 300w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-1024x585.jpg 1024w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-768x439.jpg 768w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-1536x878.jpg 1536w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-2048x1170.jpg 2048w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2024\/07\/M365-Research-Page-Banner-240x137.jpg 240w\" sizes=\"auto, (max-width: 2560px) 100vw, 2560px\" \/>\t\t<\/div>\n\t\t<!-- Foreground -->\n\t\t<div class=\"card-foreground d-flex mt-md-n5 my-lg-5 px-g px-lg-0\">\n\t\t\t<!-- Container -->\n\t\t\t<div class=\"container d-flex mt-md-n5 my-lg-5 \">\n\t\t\t\t<!-- Card wrapper -->\n\t\t\t\t<div class=\"w-100 w-lg-col-5\">\n\t\t\t\t\t<!-- Card -->\n\t\t\t\t\t<div class=\"card material-md-card py-5 px-md-5\">\n\t\t\t\t\t\t<div class=\"card-body \">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/group\/m365-research\/\" class=\"icon-link icon-link--reverse mb-2\" data-bi-cN=\"M365 Research\">\n\t\t\t\t\t\t\t\t\t<span class=\"c-glyph glyph-chevron-left\" aria-hidden=\"true\"><\/span>\n\t\t\t\t\t\t\t\t\tM365 Research\t\t\t\t\t\t\t\t<\/a>\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\n<h1 class=\"wp-block-heading\" id=\"efficient-ai\">Efficient AI<\/h1>\n\n\n\n<p>Reimagining AI efficiency from GPU kernels to context engineering to power Copilot-scale intelligence.<\/p>\n\n\t\t\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t<\/div>\n\t\t<\/div>\n\t<\/div>\n<\/section>\n\n\n\n\n\n<div style=\"padding-bottom:32px; padding-top:32px\" class=\"wp-block-msr-immersive-section alignfull row wp-block-msr-immersive-section\">\n\t\n\t<div class=\"container\">\n\t\t<div class=\"wp-block-msr-immersive-section__wrapper\">\n\t\t\t<div class=\"wp-block-columns is-layout-flex wp-container-core-columns-is-layout-9d6595d7 wp-block-columns-is-layout-flex\">\n<div class=\"wp-block-column is-vertically-aligned-top is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:20%\"><div class=\"heading-wrapper\">\n<h2 class=\"wp-block-heading is-style-spectrum-fill\" id=\"our-mission\">Our mission<\/h2>\n<\/div><\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:66.66%\">\n<p>We work to advance efficiency across AI systems by exploring novel designs and optimizations across the full AI stack: models, system design decisions, cloud infrastructure, and hardware. Our goal is to develop methods and systems that radically improve the cost, latency, and reliability of large-scale AI. We take an end-to-end approach, from GPU kernels to scheduling and batching policies to context and memory management, unlocking multiplicative gains rather than incremental improvements. By pushing the boundaries of efficiency, we enable AI that is faster, more sustainable, and ready to scale.<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"456\" src=\"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/10\/efficient-ai-research-1024x456.png\" alt=\"diagram\" class=\"wp-image-1152870\" srcset=\"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/10\/efficient-ai-research-1024x456.png 1024w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/10\/efficient-ai-research-300x134.png 300w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/10\/efficient-ai-research-768x342.png 768w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/10\/efficient-ai-research-1536x684.png 1536w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/10\/efficient-ai-research-2048x912.png 2048w, https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/10\/efficient-ai-research-240x107.png 240w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n<\/div>\n<\/div>\n\n\n\n<p><\/p>\t\t<\/div>\n\t<\/div>\n\n\t<\/div>\n\n\n\n<div style=\"padding-bottom:32px; padding-top:32px\" class=\"wp-block-msr-immersive-section alignfull row has-background has-lighter-gray-background-color has-text-color has-black-color wp-block-msr-immersive-section\">\n\t\n\t<div class=\"container\">\n\t\t<div class=\"wp-block-msr-immersive-section__wrapper\">\n\t\t\t<div class=\"wp-block-columns is-layout-flex wp-container-core-columns-is-layout-9d6595d7 wp-block-columns-is-layout-flex\">\n<div class=\"wp-block-column is-vertically-aligned-top is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:20%\"><div class=\"heading-wrapper\">\n<h2 class=\"wp-block-heading is-style-spectrum-fill\" id=\"our-mission\">Our research<\/h2>\n<\/div><\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\">\n<div class=\"wp-block-columns has-white-background-color has-background is-layout-flex wp-container-core-columns-is-layout-9d6595d7 wp-block-columns-is-layout-flex\" style=\"box-shadow:var(--wp--preset--shadow--natural)\">\n<div class=\"wp-block-column is-vertically-aligned-center is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:25%\">\n<p class=\"has-blue-color has-text-color has-link-color wp-elements-736aa35d084788248b9c1923fa74fabd\"><strong><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/?p=1150284&post_type=msr-project\" rel=\"nofollow\">Kernel\u2011level innovation and hardware\u2011aware modeling<\/a><\/strong><\/p>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:66.66%\">\n<p>We design and&nbsp;optimize&nbsp;GPU kernels and model\u2011execution strategies to maximize throughput and minimize latency for real\u2011world LLM workloads.<\/p>\n<\/div>\n<\/div>\n\n\n\n<div class=\"wp-block-columns has-white-background-color has-background is-layout-flex wp-container-core-columns-is-layout-9d6595d7 wp-block-columns-is-layout-flex\" style=\"box-shadow:var(--wp--preset--shadow--natural)\">\n<div class=\"wp-block-column is-vertically-aligned-center is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:25%\">\n<p class=\"has-blue-color has-text-color has-link-color wp-elements-ea571fa8fa4e32bc0e5a92807dd1c156\"><strong><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/?p=1150288&post_type=msr-project\" rel=\"nofollow\">System\u2011level innovation for inference at scale<\/a><\/strong><\/p>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:66.66%\">\n<p>We reimagine the AI inference stack, optimizing scheduling, routing, and resource allocation to deliver predictable performance and cost efficiency.<\/p>\n<\/div>\n<\/div>\n\n\n\n<div class=\"wp-block-columns has-white-background-color has-background is-layout-flex wp-container-core-columns-is-layout-9d6595d7 wp-block-columns-is-layout-flex\" style=\"box-shadow:var(--wp--preset--shadow--natural)\">\n<div class=\"wp-block-column is-vertically-aligned-center is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:25%\">\n<p class=\"has-blue-color has-text-color has-link-color wp-elements-62be00e16216b1efbe8d1d0b9f1b120f\"><strong><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/?p=1150291&post_type=msr-project\" rel=\"nofollow\">Context engineering and agents<\/a><\/strong><\/p>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:66.66%\">\n<p>Long\u2011horizon assistants, reasoning\u2011heavy models, and agentic workflows drive significant inference\u2011time compute and context growth. We make AI smarter and leaner by engineering &#8220;context paths&#8221; that minimize redundancy while preserving utility.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\t\t<\/div>\n\t<\/div>\n\n\t<\/div>\n\n\n\n<div style=\"padding-bottom:32px; padding-top:32px\" class=\"wp-block-msr-immersive-section alignfull row wp-block-msr-immersive-section\">\n\t\n\t<div class=\"container\">\n\t\t<div class=\"wp-block-msr-immersive-section__wrapper\">\n\t\t\t<div class=\"heading-wrapper\">\n<h2 class=\"wp-block-heading is-style-spectrum-fill\" id=\"our-mission\">News<\/h2>\n<\/div>\t\t<\/div>\n\t<\/div>\n\n\t<\/div>\n\n\n\n<div style=\"padding-bottom:32px; padding-top:32px\" class=\"wp-block-msr-immersive-section alignfull row has-background has-lighter-gray-background-color has-text-color has-black-color wp-block-msr-immersive-section\">\n\t\n\t<div class=\"container\">\n\t\t<div class=\"wp-block-msr-immersive-section__wrapper\">\n\t\t\t<div class=\"wp-block-columns is-layout-flex wp-container-core-columns-is-layout-9d6595d7 wp-block-columns-is-layout-flex\">\n<div class=\"wp-block-column is-vertically-aligned-center is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:20%\"><div class=\"heading-wrapper\">\n<h2 class=\"wp-block-heading is-style-spectrum-fill\" id=\"our-mission\">Work with us<\/h2>\n<\/div><\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\"><\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\"><\/div>\n\n\n\n<div class=\"wp-block-column is-style-default is-layout-flow wp-block-column-is-layout-flow\"><\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:75%\">\n<div class=\"wp-block-group is-layout-constrained wp-block-group-is-layout-constrained\">\n<div class=\"wp-block-group is-layout-constrained wp-block-group-is-layout-constrained\"><\/div>\n<\/div>\n<\/div>\n<\/div>\t\t<\/div>\n\t<\/div>\n\n\t<\/div>\n\n\n","protected":false},"excerpt":{"rendered":"<p>Reimagining AI efficiency from GPU kernels to context engineering to power Copilot-scale intelligence.<\/p>\n","protected":false},"featured_media":1054521,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr_group_start":"","footnotes":""},"research-area":[13556,13547],"msr-group-type":[243694],"msr-locale":[268875],"msr-impact-theme":[264846,261667],"class_list":["post-1145968","msr-group","type-msr-group","status-publish","has-post-thumbnail","hentry","msr-research-area-artificial-intelligence","msr-research-area-systems-and-networking","msr-group-type-group","msr-locale-en_us"],"msr_group_start":"","msr_detailed_description":"","msr_further_details":"","msr_hero_images":[],"msr_research_lab":[],"related-researchers":[{"type":"user_nicename","display_name":"Saravan Rajmohan","user_id":41039,"people_section":"Leadership and Strategic Steering","alias":"saravar"},{"type":"guest","display_name":"Jim Kleewein","user_id":786892,"people_section":"Leadership and Strategic Steering","alias":""},{"type":"user_nicename","display_name":"Victor Ruehle","user_id":41027,"people_section":"Research Manager","alias":"virueh"},{"type":"user_nicename","display_name":"Srikant Bharadwaj","user_id":41644,"people_section":"Team","alias":"srbharadwaj"},{"type":"user_nicename","display_name":"Camille Couturier","user_id":40111,"people_section":"Team","alias":"cacoutur"},{"type":"user_nicename","display_name":"Dongge Han","user_id":43392,"people_section":"Team","alias":"donggehan"},{"type":"user_nicename","display_name":"Helia Hashemi","user_id":44000,"people_section":"Team","alias":"heliahashemi"},{"type":"user_nicename","display_name":"Mirian Hipolito Garcia","user_id":40483,"people_section":"Team","alias":"mirianh"},{"type":"user_nicename","display_name":"Samuel Kessler","user_id":43566,"people_section":"Team","alias":"t-skessler"},{"type":"user_nicename","display_name":"Daniel Eduardo Madrigal Diaz","user_id":40480,"people_section":"Team","alias":"danielmad"},{"type":"user_nicename","display_name":"Ankur Mallick","user_id":42441,"people_section":"Team","alias":"ankurmallick"},{"type":"user_nicename","display_name":"Spyridon (Spyros) Mastorakis","user_id":43994,"people_section":"Team","alias":"smastorakis"},{"type":"user_nicename","display_name":"Anjaly Parayil","user_id":41215,"people_section":"Team","alias":"aparayil"},{"type":"user_nicename","display_name":"Renee St. Amant","user_id":43080,"people_section":"Team","alias":"reneestamant"},{"type":"user_nicename","display_name":"Molly Xia","user_id":41943,"people_section":"Team","alias":"mollyxia"},{"type":"user_nicename","display_name":"Fangkai Yang","user_id":41425,"people_section":"Team","alias":"fangkaiyang"},{"type":"user_nicename","display_name":"Jue Zhang","user_id":41212,"people_section":"Team","alias":"juezhang"},{"type":"user_nicename","display_name":"Qingwei Lin \u6797\u5e86\u7ef4","user_id":33318,"people_section":"Team","alias":"qlin"}],"related-publications":[1146313,1152925,1152863,1152860,1152856,1152852,1152844,1151270,1149294,1148074,1148072,1148067,1146318,860460,1140724,1136484,1135014,1135010,1129848,1129749,1041231,1031946,1031928,1018440,1016619,879069],"related-downloads":[],"related-videos":[],"related-projects":[1155944],"related-events":[],"related-opportunities":[],"related-posts":[],"tab-content":[],"msr_impact_theme":["Computing foundations","Empowerment"],"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-group\/1145968","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-group"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-group"}],"version-history":[{"count":130,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-group\/1145968\/revisions"}],"predecessor-version":[{"id":1157648,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-group\/1145968\/revisions\/1157648"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media\/1054521"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=1145968"}],"wp:term":[{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=1145968"},{"taxonomy":"msr-group-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-group-type?post=1145968"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=1145968"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=1145968"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}