{"id":596461,"date":"2019-07-04T15:51:21","date_gmt":"2019-07-04T22:51:21","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-research-item&#038;p=596461"},"modified":"2020-12-18T13:12:09","modified_gmt":"2020-12-18T21:12:09","slug":"staying-up-to-date-with-online-content-changes-using-reinforcement-learning-for-scheduling","status":"publish","type":"msr-research-item","link":"https:\/\/www.microsoft.com\/en-us\/research\/publication\/staying-up-to-date-with-online-content-changes-using-reinforcement-learning-for-scheduling\/","title":{"rendered":"Staying up to Date with Online Content Changes Using Reinforcement Learning for Scheduling"},"content":{"rendered":"<p>From traditional Web search engines to virtual assistants and Web accelerators, services that rely on online information need to continually keep track of remote content changes by explicitly requesting content updates from remote sources (e.g., web pages). We propose a novel optimization objective for this setting that has several practically desirable properties, and ef\ufb01cient algorithms for it with optimality guarantees even in the face of mixed content change observability and initially unknown change model parameters. Experiments on 18.5M URLs crawled daily for 14 weeks show signi\ufb01cant advantages of this approach over prior art.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>From traditional Web search engines to virtual assistants and Web accelerators, services that rely on online information need to continually keep track of remote content changes by explicitly requesting content updates from remote sources (e.g., web pages). We propose a novel optimization objective for this setting that has several practically desirable properties, and ef\ufb01cient algorithms [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"","msr_publisher_other":"","msr_booktitle":"","msr_chapter":"","msr_edition":"","msr_editors":"","msr_how_published":"","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"","msr_pages_string":"","msr_page_range_start":"","msr_page_range_end":"","msr_series":"","msr_volume":"","msr_copyright":"","msr_conference_name":"NeurIPS 2019","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"","msr_pubmed_id":"","msr_other_authors":"","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":"","msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2019-10-30","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":0,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":true,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":0,"footnotes":""},"msr-research-highlight":[],"research-area":[13556],"msr-publication-type":[193716],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[],"msr-field-of-study":[],"msr-conference":[],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-596461","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-artificial-intelligence","msr-locale-en_us"],"msr_publishername":"","msr_edition":"","msr_affiliation":"","msr_published_date":"2019-10-30","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":1,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"file","viewUrl":"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2019\/07\/NeurIPS_2019_CAMERA_READY_WITH_SUPPLEMENT_Staying_Up_to_Date_with_Online_Content_Changes_Using_Reinforcement_Learning_for_Scheduling.pdf","id":"618864","title":"neurips_2019_camera_ready_with_supplement_staying_up_to_date_with_online_content_changes_using_reinforcement_learning_for_scheduling-2","label_id":"243109","label":0}],"msr_related_uploader":"","msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[{"id":618864,"url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2019\/10\/NeurIPS_2019_CAMERA_READY_WITH_SUPPLEMENT_Staying_Up_to_Date_with_Online_Content_Changes_Using_Reinforcement_Learning_for_Scheduling.pdf"},{"id":618861,"url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2019\/10\/NeurIPS_2019_CAMERA_READY_WITH_SUPPLEMENT_Staying_Up_to_Date_with_Online_Content_Changes_Using_Reinforcement_Learning_for_Scheduling.pdf"},{"id":596464,"url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2019\/07\/Staying-up-to-date-ICML-2019.pdf"}],"msr-author-ordering":[{"type":"user_nicename","value":"Andrey Kolobov","user_id":30910,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Andrey Kolobov"},{"type":"user_nicename","value":"Yuval Peres","user_id":33234,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Yuval Peres"},{"type":"text","value":"Cheng Lu","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Eric Horvitz","user_id":32033,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Eric Horvitz"}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[],"msr_project":[642246],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"inproceedings","related_content":{"projects":[{"ID":642246,"post_title":"Web crawl scheduling","post_name":"web-crawl-scheduling","post_type":"msr-project","post_date":"2020-03-10 22:58:14","post_modified":"2020-03-20 01:18:20","post_status":"publish","permalink":"https:\/\/www.microsoft.com\/en-us\/research\/project\/web-crawl-scheduling\/","post_excerpt":"A web crawler is an essential part of a search engine that visits URLs and downloads their content, which is subsequently processed, indexed, and used by the search engine to decide which results to show the users. As the Web is becoming increasingly dynamic, in addition to discovering new web pages a crawler needs to keep revisiting those already in the search engine\u2019s index, in order to keep the index fresh by picking up the&hellip;","_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/642246"}]}}]},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/596461","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":1,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/596461\/revisions"}],"predecessor-version":[{"id":596467,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/596461\/revisions\/596467"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=596461"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=596461"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=596461"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=596461"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=596461"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=596461"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=596461"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=596461"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=596461"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=596461"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=596461"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=596461"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=596461"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}