{"id":1145331,"date":"2025-07-19T02:12:34","date_gmt":"2025-07-19T09:12:34","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-research-item&#038;p=1145331"},"modified":"2025-07-20T01:38:40","modified_gmt":"2025-07-20T08:38:40","slug":"execoder-empowering-large-language-models-with-executability-representation-for-code-translation","status":"publish","type":"msr-research-item","link":"https:\/\/www.microsoft.com\/en-us\/research\/publication\/execoder-empowering-large-language-models-with-executability-representation-for-code-translation\/","title":{"rendered":"ExeCoder: Empowering Large Language Models with Executability Representation for Code Translation"},"content":{"rendered":"<p>Code translation is a crucial activity in the software development and maintenance process, and researchers have recently begun to focus on using pre-trained large language models (LLMs) for code translation. However, existing LLMs only learn the contextual semantics of code during pre-training, neglecting executability information closely related to the execution state of the code, which results in unguaranteed code executability and unreliable automated code translation. To address this issue, we propose ExeCoder, an LLM specifically designed for code translation, aimed at utilizing executability representations such as functional semantics, syntax structures, and variable dependencies to enhance the capabilities of LLMs in code translation. To evaluate the effectiveness of ExeCoder, we manually enhanced the widely used benchmark TransCoder-test, resulting in a benchmark called TransCoder-test-X that serves LLMs. Evaluation of TransCoder-test-X indicates that ExeCoder achieves state-of-the-art performance in code translation, surpassing existing open-source code LLMs by over 10.88% to 38.78% and over 27.44% to 42.97% on two metrics, and even outperforms the renowned closed-source LLM GPT-4o. Website: https:\/\/execoder4trans.github.io\/<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Code translation is a crucial activity in the software development and maintenance process, and researchers have recently begun to focus on using pre-trained large language models (LLMs) for code translation. However, existing LLMs only learn the contextual semantics of code during pre-training, neglecting executability information closely related to the execution state of the code, which [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"","msr_publisher_other":"","msr_booktitle":"","msr_chapter":"","msr_edition":"","msr_editors":"","msr_how_published":"","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"","msr_pages_string":"","msr_page_range_start":"","msr_page_range_end":"","msr_series":"","msr_volume":"abs\/2501.18460","msr_copyright":"","msr_conference_name":"","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"","msr_pubmed_id":"","msr_other_authors":"","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":null,"msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2025-1-30","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":0,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":true,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":null,"footnotes":""},"msr-research-highlight":[],"research-area":[13556,13560],"msr-publication-type":[193715],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[269148,269142],"msr-field-of-study":[246694,246691,249202,251365],"msr-conference":[],"msr-journal":[268188],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-1145331","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-artificial-intelligence","msr-research-area-programming-languages-software-engineering","msr-locale-en_us","msr-post-option-approved-for-river","msr-post-option-include-in-river","msr-field-of-study-artificial-intelligence","msr-field-of-study-computer-science","msr-field-of-study-programming-language","msr-field-of-study-software-engineering"],"msr_publishername":"","msr_edition":"","msr_affiliation":"","msr_published_date":"2025-1-30","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"abs\/2501.18460","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":1,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"doi","viewUrl":"false","id":"false","title":"https:\/\/doi.org\/10.48550\/arXiv.2501.18460","label_id":"243106","label":0},{"type":"url","viewUrl":"false","id":"false","title":"https:\/\/dblp.org\/rec\/journals\/corr\/abs-2501-18460.html","label_id":"243109","label":0},{"type":"url","viewUrl":"false","id":"false","title":"https:\/\/arxiv.org\/abs\/2501.18460","label_id":"243109","label":0}],"msr_related_uploader":"","msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[],"msr-author-ordering":[{"type":"text","value":"Minghua He","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Fangkai Yang","user_id":41425,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Fangkai Yang"},{"type":"user_nicename","value":"Pu Zhao","user_id":38886,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Pu Zhao"},{"type":"text","value":"Wenjie Yin","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Yu Kang","user_id":39381,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Yu Kang"},{"type":"user_nicename","value":"Qingwei Lin \u6797\u5e86\u7ef4","user_id":33318,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Qingwei Lin \u6797\u5e86\u7ef4"},{"type":"user_nicename","value":"Saravan Rajmohan","user_id":41039,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Saravan Rajmohan"},{"type":"user_nicename","value":"Dongmei Zhang","user_id":31665,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Dongmei Zhang"},{"type":"text","value":"Qi Zhang","user_id":0,"rest_url":false}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[],"msr_project":[853323],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"article","related_content":{"projects":[{"ID":853323,"post_title":"Cloud System and Software Analytics","post_name":"cloud-system-and-software-analytics","post_type":"msr-project","post_date":"2022-06-24 00:55:15","post_modified":"2022-10-24 01:21:01","post_status":"publish","permalink":"https:\/\/www.microsoft.com\/en-us\/research\/project\/cloud-system-and-software-analytics\/","post_excerpt":"In Microsoft, we build and operate several world leading complex and large-scale productivity clouds (Azure, Microsoft 365). The quality of cloud platforms, including reliability, performance, efficiency, security, sustainability, etc., has become immensely important. The distributed nature, massive scale, and high complexity of cloud platforms present huge challenges to build and operate such systems effectively and efficiently. Each independent service in cloud computing, such as computing virtualization, cloud storage service, distributed database, etc., is a complex&hellip;","_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/853323"}]}}]},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1145331","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":1,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1145331\/revisions"}],"predecessor-version":[{"id":1145332,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1145331\/revisions\/1145332"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=1145331"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=1145331"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=1145331"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=1145331"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=1145331"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=1145331"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=1145331"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=1145331"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=1145331"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=1145331"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=1145331"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=1145331"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=1145331"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}