{"id":1115526,"date":"2025-01-02T15:27:56","date_gmt":"2025-01-02T23:27:56","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-research-item&#038;p=1115526"},"modified":"2025-01-02T15:27:57","modified_gmt":"2025-01-02T23:27:57","slug":"lean-formalization-of-extended-regular-expression-matching-with-lookarounds","status":"publish","type":"msr-research-item","link":"https:\/\/www.microsoft.com\/en-us\/research\/publication\/lean-formalization-of-extended-regular-expression-matching-with-lookarounds\/","title":{"rendered":"Lean Formalization of Extended Regular Expression Matching with Lookarounds"},"content":{"rendered":"<p>We present a formalization of a matching algorithm for extended regular expression matching based on locations and symbolic derivatives which supports intersection, complement and lookarounds and whose implementation mirrors an extension of the recent .NET NonBacktracking regular expression engine. The formalization of the algorithm and its semantics uses the Lean 4 proof assistant. The proof of its correctness is with respect to standard matching semantics.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>We present a formalization of a matching algorithm for extended regular expression matching based on locations and symbolic derivatives which supports intersection, complement and lookarounds and whose implementation mirrors an extension of the recent .NET NonBacktracking regular expression engine. The formalization of the algorithm and its semantics uses the Lean 4 proof assistant. The proof [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"ACM","msr_publisher_other":"","msr_booktitle":"","msr_chapter":"","msr_edition":"","msr_editors":"","msr_how_published":"","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"ACM","msr_pages_string":"","msr_page_range_start":"118","msr_page_range_end":"131","msr_series":"","msr_volume":"","msr_copyright":"This work is licensed under a Creative Commons Attribution 4.0 International License.","msr_conference_name":"CPP","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"","msr_pubmed_id":"","msr_other_authors":"","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":"","msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2024-1","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"https:\/\/popl24.sigplan.org\/home\/CPP-2024","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":0,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":true,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":null,"footnotes":""},"msr-research-highlight":[],"research-area":[13560],"msr-publication-type":[193716],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[269142],"msr-field-of-study":[246904,253207],"msr-conference":[],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-1115526","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-programming-languages-software-engineering","msr-locale-en_us","msr-post-option-include-in-river","msr-field-of-study-algorithm","msr-field-of-study-regular-expression"],"msr_publishername":"ACM","msr_edition":"","msr_affiliation":"","msr_published_date":"2024-1","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"ACM","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":1,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"file","viewUrl":"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/01\/cpp24-final.pdf","id":"1115529","title":"cpp24-final","label_id":"243103","label":0},{"type":"doi","viewUrl":"false","id":"false","title":"10.1145\/3636501.3636959","label_id":"243106","label":0}],"msr_related_uploader":[{"type":"file","viewUrl":"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/01\/cpp24-final.pdf","id":"1115529","title":"cpp24-final","label_id":"243112","label":0}],"msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[{"id":1115529,"url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2025\/01\/cpp24-final.pdf"}],"msr-author-ordering":[{"type":"text","value":"Ekaterina Zhuchko","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Margus Veanes","user_id":32806,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Margus Veanes"},{"type":"user_nicename","value":"Gabriel Ebner","user_id":42573,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Gabriel Ebner"}],"msr_impact_theme":[],"msr_research_lab":[199565],"msr_event":[],"msr_group":[144812],"msr_project":[259698],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"inproceedings","related_content":{"projects":[{"ID":259698,"post_title":"Automata","post_name":"automata","post_type":"msr-project","post_date":"2016-07-20 18:35:07","post_modified":"2018-12-04 17:02:22","post_status":"publish","permalink":"https:\/\/www.microsoft.com\/en-us\/research\/project\/automata\/","post_excerpt":"Automata is a .NET library that provides algorithms for composing and analyzing regular expressions, automata, and transducers. In addition to classical word automata, it also includes algorithms for analysis of tree automata and tree transducers. The library covers algorithms over finite alphabets as well as their symbolic counterparts. In symbolic automata concrete characters have been replaced by character predicates. Such predicates can range over very large or even infinite alphabets, like integers. Predicates can be&hellip;","_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/259698"}]}}]},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1115526","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":1,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1115526\/revisions"}],"predecessor-version":[{"id":1115532,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1115526\/revisions\/1115532"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=1115526"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=1115526"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=1115526"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=1115526"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=1115526"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=1115526"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=1115526"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=1115526"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=1115526"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=1115526"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=1115526"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=1115526"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=1115526"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}