b:["$","main",null,{"className":"container mx-auto p-4 py-8 sm:px-6 lg:px-8","data-sentry-component":"PapersPage","data-sentry-source-file":"page.tsx","children":[["$","nav",null,{"className":"hidden sm:flex mb-4","aria-label":"Breadcrumb","data-sentry-component":"Breadcrumb","data-sentry-source-file":"Breadcrumb.tsx","children":["$","ol",null,{"role":"list","className":"flex items-center space-x-2","children":[["$","li","0",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[false,["$","$L6",null,{"href":"/","aria-current":"$undefined","className":"text-sm font-medium hover:underline text-gray-500 hover:text-gray-700","children":"Home"}]]}]}],["$","li","1",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 20 20","fill":"currentColor","aria-hidden":"true","data-slot":"icon","ref":"$undefined","aria-labelledby":"$undefined","className":"size-5 shrink-0 text-gray-400","children":[null,["$","path",null,{"fillRule":"evenodd","d":"M8.22 5.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.75.75 0 0 1-1.06-1.06L11.94 10 8.22 6.28a.75.75 0 0 1 0-1.06Z","clipRule":"evenodd"}]]}],["$","$L6",null,{"href":"/papers","aria-current":"$undefined","className":"text-sm font-medium hover:underline text-gray-500 hover:text-gray-700","children":"Papers"}]]}]}],["$","li","2",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 20 20","fill":"currentColor","aria-hidden":"true","data-slot":"icon","ref":"$undefined","aria-labelledby":"$undefined","className":"size-5 shrink-0 text-gray-400","children":[null,["$","path",null,{"fillRule":"evenodd","d":"M8.22 5.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.75.75 0 0 1-1.06-1.06L11.94 10 8.22 6.28a.75.75 0 0 1 0-1.06Z","clipRule":"evenodd"}]]}],["$","$L6",null,{"href":"#","aria-current":"$undefined","className":"text-sm font-medium hover:underline text-gray-500 hover:text-gray-700","children":"2410.03001"}]]}]}],["$","li","3",{"children":["$","div",null,{"className":"flex items-center space-x-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 20 20","fill":"currentColor","aria-hidden":"true","data-slot":"icon","ref":"$undefined","aria-labelledby":"$undefined","className":"size-5 shrink-0 text-gray-400","children":[null,["$","path",null,{"fillRule":"evenodd","d":"M8.22 5.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.75.75 0 0 1-1.06-1.06L11.94 10 8.22 6.28a.75.75 0 0 1 0-1.06Z","clipRule":"evenodd"}]]}],["$","$L6",null,{"href":"/papers/2410.03001/cited-by","aria-current":"page","className":"text-sm font-medium hover:underline text-primary-900 hover:text-primary-950","children":"Cited By"}]]}]}]]}]}],["$","div",null,{"className":"mb-12 flex items-start gap-4 md:gap-6 lg:items-center lg:gap-8","children":[["$","$L1a",null,{"src":"https://cdn.parameterlab.de/papers/2410.03001/pages/page-1-q80d300.jpg","alt":"Can Transformers Learn $n$-gram Language Models?","className":"my-6 aspect-paper w-24 grow-0 shadow-md md:w-32 lg:w-48","width":128,"height":128}],["$","section",null,{"className":"space-y-0 md:space-y-2","data-sentry-component":"Overview","data-sentry-source-file":"Overview.tsx","children":[["$","h1",null,{"className":"text-lg font-black leading-tight text-primary-600 md:text-xl lg:text-2xl xl:text-3xl","children":["Can Transformers Learn ",["$","span","1",{"data-testid":"react-katex","dangerouslySetInnerHTML":{"__html":"

n

"}}],"-gram Language Models?"]}],["$","$L1c",null,{"tip":"Publication date","className":"w-fit items-start","data-sentry-element":"ToolTip","data-sentry-source-file":"Overview.tsx","children":["$","time",null,{"className":"text-gray-500","dateTime":"2024-10-03T21:21:02.000Z","children":"3 October 2024"}]}],["$","div",null,{"className":"flex w-full flex-wrap gap-x-2 text-xs font-light text-gray-500 group-hover:text-gray-600","data-sentry-component":"AuthorNames","data-sentry-source-file":"AuthorNames.tsx","children":[["$","$L1d","anej-svete",{"slug":"anej-svete","name":"Anej Svete","prefetch":false,"disableLink":"$undefined"}],["$","$L1d","nadav-borenstein",{"slug":"nadav-borenstein","name":"Nadav Borenstein","prefetch":false,"disableLink":"$undefined"}],["$","$L1d","m--zhou-3",{"slug":"m--zhou-3","name":"M. Zhou","prefetch":false,"disableLink":"$undefined"}],["$","$L1d","isabelle-augenstein",{"slug":"isabelle-augenstein","name":"Isabelle Augenstein","prefetch":false,"disableLink":"$undefined"}],["$","$L1d","ryan-cotterell",{"slug":"ryan-cotterell","name":"Ryan Cotterell","prefetch":false,"disableLink":"$undefined"}]]}],["$","ul",null,{"className":"flex flex-wrap gap-1 lg:gap-2","children":[[],["$","$L1e",null,{"paperId":"2410.03001"}]]}],["$","div",null,{"className":"flex gap-1 items-center flex-wrap","children":[["$","$L6",null,{"href":"http://arxiv.org/abs/2410.03001","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-1 text-xs font-medium px-2 py-1 rounded-md bg-gray-100 hover:bg-gray-200 text-gray-700 transition-colors","aria-label":"ArXiv page","children":[["$","span",null,{"children":"ArXiv"}],["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]]}],["$","$L6",null,{"href":"https://arxiv.org/pdf/2410.03001","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-1 text-xs font-medium px-2 py-1 rounded-md bg-gray-100 hover:bg-gray-200 text-gray-700 transition-colors","aria-label":"PDF download","children":[["$","span",null,{"children":"PDF"}],["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]]}],["$","$L6",null,{"href":"https://arxiv.org/html/2410.03001v1","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-1 text-xs font-medium px-2 py-1 rounded-md bg-gray-100 hover:bg-gray-200 text-gray-700 transition-colors","aria-label":"HTML version","children":[["$","span",null,{"children":"HTML"}],["$","svg",null,{"ref":"$undefined","xmlns":"http://www.w3.org/2000/svg","width":12,"height":12,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]]}]]}]]}]]}],["$","h1",null,{"className":"mb-4 text-xl font-medium text-primary-600","children":["Papers citing"," ",["$","span",null,{"className":"font-semibold italic text-primary-900","children":["\"","Can Transformers Learn $n$-gram Language Models?","\""]}]]}],["$","$L1b",null,{"className":"w-full","papers":[{"id":"2504.15471","title":"Bigram Subnetworks: Mapping to Next Tokens in Transformer Language Models","abstract":"$1f","thumbnail":"https://cdn.parameterlab.de/papers/2504.15471/pages/page.jpg","publishedAt":"2025-04-21T00:00:00.000Z","authors":["Tyler A. Chang","Benjamin K. Bergen"],"authorEntities":[{"slug":"tyler-a--chang","name":"Tyler A. Chang"},{"slug":"benjamin-bergen","name":"Benjamin Bergen"}],"citationCount":0,"likeCount":0,"communities":[],"viewCount":"38","crawledAt":"2025-04-29T03:09:06.238Z","updatedAt":"2025-04-25T00:00:00.000Z"},{"id":"2504.10637","title":"Better Estimation of the KL Divergence Between Language Models","abstract":"$20","thumbnail":"https://cdn.parameterlab.de/papers/2504.10637/pages/page.jpg","publishedAt":"2025-04-14T00:00:00.000Z","authors":["Afra Amini","Tim Vieira","Ryan Cotterell"],"authorEntities":[{"slug":"afra-amini","name":"Afra Amini"},{"slug":"tim-vieira","name":"Tim Vieira"},{"slug":"ryan-cotterell","name":"Ryan Cotterell"}],"citationCount":0,"likeCount":0,"communities":[],"viewCount":"41","crawledAt":"2025-05-06T03:01:54.480Z","updatedAt":"2025-05-02T00:00:00.000Z"}],"pagination":{"page":1,"size":2,"totalPages":1,"total":2},"error":"$undefined","data-sentry-element":"PaperTable","data-sentry-source-file":"page.tsx"}],["$","$L21",null,{"currentPage":1,"totalPages":1}]]}]

Can Transformers Learn nnn-gram Language Models?

Papers citing "Can Transformers Learn $n$-gram Language Models?"

Can Transformers Learn $n$ -gram Language Models?