7:["$","main",null,{"className":"container mx-auto p-4 py-8 sm:px-6 lg:px-8","data-sentry-component":"PapersPage","data-sentry-source-file":"page.tsx","children":[["$","nav",null,{"className":"hidden sm:flex mb-4","aria-label":"Breadcrumb","data-sentry-component":"Breadcrumb","data-sentry-source-file":"Breadcrumb.tsx","children":["$","ol",null,{"role":"list","className":"flex items-center space-x-2","children":[["$","li","0",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[false,["$","$L1a",null,{"href":"/","prefetch":false,"aria-current":"$undefined","className":"text-sm font-medium hover:underline text-gray-500 dark:text-neutral-400 hover:text-gray-700 dark:hover:text-neutral-300","children":"Home"}]]}]}],["$","li","1",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 20 20","fill":"currentColor","aria-hidden":"true","data-slot":"icon","ref":"$undefined","aria-labelledby":"$undefined","className":"size-5 shrink-0 text-gray-400 dark:text-neutral-500","children":[null,["$","path",null,{"fillRule":"evenodd","d":"M8.22 5.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.75.75 0 0 1-1.06-1.06L11.94 10 8.22 6.28a.75.75 0 0 1 0-1.06Z","clipRule":"evenodd"}]]}],["$","$L1a",null,{"href":"/papers","prefetch":false,"aria-current":"$undefined","className":"text-sm font-medium hover:underline text-gray-500 dark:text-neutral-400 hover:text-gray-700 dark:hover:text-neutral-300","children":"Papers"}]]}]}],["$","li","2",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 20 20","fill":"currentColor","aria-hidden":"true","data-slot":"icon","ref":"$undefined","aria-labelledby":"$undefined","className":"size-5 shrink-0 text-gray-400 dark:text-neutral-500","children":[null,["$","path",null,{"fillRule":"evenodd","d":"M8.22 5.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.75.75 0 0 1-1.06-1.06L11.94 10 8.22 6.28a.75.75 0 0 1 0-1.06Z","clipRule":"evenodd"}]]}],["$","$L1a",null,{"href":"#","prefetch":false,"aria-current":"$undefined","className":"text-sm font-medium hover:underline text-gray-500 dark:text-neutral-400 hover:text-gray-700 dark:hover:text-neutral-300","children":"2601.04411"}]]}]}],["$","li","3",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 20 20","fill":"currentColor","aria-hidden":"true","data-slot":"icon","ref":"$undefined","aria-labelledby":"$undefined","className":"size-5 shrink-0 text-gray-400 dark:text-neutral-500","children":[null,["$","path",null,{"fillRule":"evenodd","d":"M8.22 5.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.75.75 0 0 1-1.06-1.06L11.94 10 8.22 6.28a.75.75 0 0 1 0-1.06Z","clipRule":"evenodd"}]]}],["$","$L1a",null,{"href":"/papers/2601.04411/cited-by","prefetch":false,"aria-current":"page","className":"text-sm font-medium hover:underline text-primary-900 dark:text-primary-300 hover:text-primary-950 dark:hover:text-primary-200","children":"Cited By"}]]}]}]]}]}],["$","div",null,{"className":"mb-12 flex items-start gap-4 md:gap-6 lg:items-center lg:gap-8","children":[["$","$L19",null,{"src":"https://cdn.parameterlab.de/papers/2601.04411/pages/page.jpg","alt":"Rate or Fate? RLV$^\\varepsilon$R: Reinforcement Learning with Verifiable Noisy Rewards","className":"my-6 aspect-paper w-24 grow-0 shadow-md md:w-32 lg:w-48","width":128,"height":128}],["$","section",null,{"className":"space-y-1 md:space-y-2","data-sentry-component":"Overview","data-sentry-source-file":"Overview.tsx","children":[null,["$","h1",null,{"className":"text-lg font-black leading-tight text-primary-600 dark:text-primary-200 md:text-xl lg:text-2xl xl:text-3xl","children":[["Rate or Fate? RLV",["$","span","0",{"data-testid":"react-katex","dangerouslySetInnerHTML":{"__html":"

^\\varepsilon

"}}],"R: Reinforcement Learning with Verifiable Noisy Rewards"],["$","$L21",null,{"tip":"Technical paper","className":"ml-2 inline-block align-middle","children":["$","$L22",null,{"icon":"fluent-emoji:test-tube","className":"size-5"}]}]]}],null,["$","$L21",null,{"tip":"Publication date","className":"w-fit items-start","data-sentry-element":"ToolTip","data-sentry-source-file":"Overview.tsx","children":["$","time",null,{"className":"text-gray-500 dark:text-gray-400","dateTime":"2026-01-07T00:00:00.000Z","children":"7 January 2026"}]}],["$","$L23",null,{"paper":{"id":"2601.04411","title":"Rate or Fate? RLV$^\\varepsilon$R: Reinforcement Learning with Verifiable Noisy Rewards","abstract":"$24","thumbnail":"https://cdn.parameterlab.de/papers/2601.04411/pages/page.jpg","publishedAt":"2026-01-07T00:00:00.000Z","authors":["Ali Rad","Khashayar Filom","Darioush Keivan","Peyman Mohajerin Esfahani","Ehsan Kamalinejad"],"authorEntities":[],"emails":["ali@cognichip.ai","","","",""],"conferenceInfo":null,"citationCount":0,"likeCount":0,"updatedAt":"2026-01-07T00:00:00.000Z","crawledAt":"2026-01-09T03:01:59.484Z","includingImages":[],"pageImages":["https://cdn.parameterlab.de/papers/2601.04411/pages/page.jpg"],"summary":null,"communities":[],"viewCount":"17","bibtex":"@article{rad2026_2601.04411,\n title={ Rate or Fate? RLV$^\\varepsilon$R: Reinforcement Learning with Verifiable Noisy Rewards },\n author={ Ali Rad and Khashayar Filom and Darioush Keivan and Peyman Mohajerin Esfahani and Ehsan Kamalinejad },\n journal={arXiv preprint arXiv:2601.04411},\n year={ 2026 }\n}","youtubeIds":[],"connectYoutube":null,"githubUrl":"https://github.com/cognichip/Noisy-RL","githubStars":1,"versions":[{"version":"v1","title":"Rate or Fate? RLV$^\\varepsilon$R: Reinforcement Learning with Verifiable Noisy Rewards","abstract":"$25","authors":["Ali Rad","Khashayar Filom","Darioush Keivan","Peyman Mohajerin Esfahani","Ehsan Kamalinejad"],"publishedAt":"2026-01-07T00:00:00.000Z"}],"currentVersion":"v1","paperType":"others","paperStats":{"mainPages":18,"bibliographyPages":4,"appendixPages":40,"figures":9,"tables":5},"huggingFaceUpvotes":null},"disableLink":"$undefined","data-sentry-element":"AuthorNames","data-sentry-source-file":"Overview.tsx"}],["$","ul",null,{"className":"flex flex-wrap gap-1 lg:gap-2","children":[[],["$","$L26",null,{"paperId":"2601.04411"}]]}],["$","div",null,{"data-tour":"paper-external-links","className":"inline-flex gap-1 items-center flex-wrap","children":[["$","$L1a",null,{"href":"http://arxiv.org/abs/2601.04411","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-1 text-xs font-medium px-2 py-1 rounded-md bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-200 transition-colors","aria-label":"ArXiv page","children":[["$","span",null,{"children":"ArXiv (abs)"}],["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]]}],["$","$L1a",null,{"href":"https://arxiv.org/pdf/2601.04411","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-1 text-xs font-medium px-2 py-1 rounded-md bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-200 transition-colors","aria-label":"PDF download","children":[["$","span",null,{"children":"PDF"}],["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]]}],["$","$L1a",null,{"href":"https://arxiv.org/html/2601.04411v1","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-1 text-xs font-medium px-2 py-1 rounded-md bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-200 transition-colors","aria-label":"HTML version","children":[["$","span",null,{"children":"HTML"}],["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]]}],false,["$","$L1a",null,{"href":"https://github.com/cognichip/Noisy-RL","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-1 text-xs font-medium px-2 py-1 rounded-md bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-200 transition-colors","aria-label":"GitHub repository","children":[["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-github","children":[["$","path","tonef",{"d":"M15 22v-4a4.8 4.8 0 0 0-1-3.5c3 0 6-2 6-5.5.08-1.25-.27-2.48-1-3.5.28-1.15.28-2.35 0-3.5 0 0-1 0-3 1.5-2.64-.5-5.36-.5-8 0C6 2 5 2 5 2c-.3 1.15-.3 2.35 0 3.5A5.403 5.403 0 0 0 4 9c0 3.5 3 5.5 6 5.5-.39.49-.68 1.05-.85 1.65-.17.6-.22 1.23-.15 1.85v4"}],["$","path","9comsn",{"d":"M9 18c-4.51 2-5-2-7-2"}],"$undefined"]}],["$","span",null,{"children":["Github"," (1★)"]}],["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]]}],[],null]}]]}]]}],["$","h1",null,{"className":"mb-4 text-xl font-medium text-primary-600 dark:text-primary-400","children":["Papers citing"," ",["$","span",null,{"className":"font-semibold italic text-primary-900 dark:text-primary-300","children":["\"","Rate or Fate? RLV$^\\varepsilon$R: Reinforcement Learning with Verifiable Noisy Rewards","\""]}]]}],["$","$L1b",null,{"className":"w-full","papers":[],"pagination":{"page":1,"size":0,"totalPages":0,"total":0},"error":"$undefined","data-sentry-element":"PaperTable","data-sentry-source-file":"page.tsx"}],["$","$L27",null,{"currentPage":1,"totalPages":0}]]}]

Rate or Fate? RLVε^\varepsilonεR: Reinforcement Learning with Verifiable Noisy Rewards

Papers citing "Rate or Fate? RLV$^\varepsilon$R: Reinforcement Learning with Verifiable Noisy Rewards"

Rate or Fate? RLV $^\varepsilon$ R: Reinforcement Learning with Verifiable Noisy Rewards