When you're scaling a peak, reliability tends to be a big deal!

1.0Microsoft Researchhttps://www.microsoft.com/en-us/researchLaura LoPrestihttps://www.microsoft.com/en-us/research/people/v-lalopr/When you're scaling a peak, reliability tends to be a big deal!rich600338<blockquote class="wp-embedded-content" data-secret="gcc09y7Hjj"><a href="https://www.microsoft.com/en-us/research/blog/reliability-in-reinforcement-learning/">Reliability in Reinforcement Learning</a></blockquote><iframe sandbox="allow-scripts" security="restricted" src="https://www.microsoft.com/en-us/research/blog/reliability-in-reinforcement-learning/embed/#?secret=gcc09y7Hjj" width="600" height="338" title="“Reliability in Reinforcement Learning” — Microsoft Research" data-secret="gcc09y7Hjj" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" class="wp-embedded-content"></iframe><script> /*! This file is auto-generated */ !function(d,l){"use strict";l.querySelector&&d.addEventListener&&"undefined"!=typeof URL&&(d.wp=d.wp||{},d.wp.receiveEmbedMessage||(d.wp.receiveEmbedMessage=function(e){var t=e.data;if((t||t.secret||t.message||t.value)&&!/[^a-zA-Z0-9]/.test(t.secret)){for(var s,r,n,a=l.querySelectorAll('iframe[data-secret="'+t.secret+'"]'),o=l.querySelectorAll('blockquote[data-secret="'+t.secret+'"]'),c=new RegExp("^https?:$","i"),i=0;i<o.length;i++)o[i].style.display="none";for(i=0;i<a.length;i++)s=a[i],e.source===s.contentWindow&&(s.removeAttribute("style"),"height"===t.message?(1e3<(r=parseInt(t.value,10))?r=1e3:~~r<200&&(r=200),s.height=r):"link"===t.message&&(r=new URL(s.getAttribute("src")),n=new URL(t.value),c.test(n.protocol))&&n.host===r.host&&l.activeElement===s&&(d.top.location.href=t.value))}},d.addEventListener("message",d.wp.receiveEmbedMessage,!1),l.addEventListener("DOMContentLoaded",function(){for(var e,t,s=l.querySelectorAll("iframe.wp-embedded-content"),r=0;r<s.length;r++)(t=(e=s[r]).getAttribute("data-secret"))||(t=Math.random().toString(36).substring(2,12),e.src+="#?secret="+t,e.setAttribute("data-secret",t)),e.contentWindow.postMessage({message:"ready",secret:t},"*")},!1)))}(window,document); //# sourceURL=https://www.microsoft.com/en-us/research/wp-includes/js/wp-embed.min.js </script> https://www.microsoft.com/en-us/research/wp-content/uploads/2019/06/Reliability-In-Reinforcement-Learning_Social_06_2019_1200x627.png1200627Deep reinforcement learning algorithms are impressive, but only when they work. In reality, they are largely unreliable and can yield very different results. Romain Laroche proposes two ways to achieve reliability in RL.