<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/CINECAstyle.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-06-14T07:47:25Z</responseDate><request verb="GetRecord" identifier="oai:iris.cnr.it:20.500.14243/570521" metadataPrefix="oai_dc">https://iris.cnr.it/oai/request</request><GetRecord><record><header><identifier>oai:iris.cnr.it:20.500.14243/570521</identifier><datestamp>2026-03-04T01:27:43Z</datestamp><setSpec>com_20.500.14243_46</setSpec><setSpec>com_20.500.14243_21</setSpec><setSpec>col_20.500.14243_47</setSpec><setSpec>ou_ou239</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>TEXT-CAKE: Challenging Language Models on Local Text Coherence</dc:title>
<dc:creator>Dini L.</dc:creator>
<dc:creator>Brunato D.</dc:creator>
<dc:creator>Dell'Orletta F.</dc:creator>
<dc:creator>Caselli T.</dc:creator>
<dc:contributor>Dini, L.</dc:contributor>
<dc:contributor> Brunato, D.</dc:contributor>
<dc:contributor> Dell'Orletta, F.</dc:contributor>
<dc:contributor> Caselli, T.</dc:contributor>
<dc:subject>Large Language Models (LLMs)</dc:subject>
<dc:subject>Text Coherence</dc:subject>
<dc:description>We present a deep investigation of encoder-based Language Models (LMs) on their abilities to detect text coherence across four languages and four text genres using a new evaluation benchmark, TEXT-CAKE. We analyze both multilingual and monolingual LMs with varying architectures and parameters in different finetuning settings. Our findings demonstrate that identifying subtle perturbations that disrupt local coherence is still a challenging task. Furthermore, our results underline the importance of using diverse text genres during pre-training and of an optimal pre-traning objective and large vocabulary size. When controlling for other parameters, deep LMs (i.e., higher number of layers) have an advantage over shallow ones, even when the total number of parameters is smaller.</dc:description>
<dc:date>2025</dc:date>
<dc:type>info:eu-repo/semantics/conferenceObject</dc:type>
<dc:identifier>https://hdl.handle.net/20.500.14243/570521</dc:identifier>
<dc:identifier>info:eu-repo/semantics/altIdentifier/scopus/2-s2.0-85218500743</dc:identifier>
<dc:language>eng</dc:language>
<dc:relation>ispartofbook:Proceedings - International Conference on Computational Linguistics, COLING</dc:relation>
<dc:relation>31st International Conference on Computational Linguistics, COLING 2025</dc:relation>
<dc:relation>firstpage:4384</dc:relation>
<dc:relation>lastpage:4398</dc:relation>
<dc:relation>numberofpages:15</dc:relation>
<dc:relation>serie:INTERNATIONAL CONFERENCE ON COMPUTATIONAL LINGUISTICS</dc:relation>
<dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
<dc:publisher>Association for Computational Linguistics (ACL)</dc:publisher>
<dc:rights>license:Creative commons</dc:rights>
<dc:rights>license uri:http://creativecommons.org/licenses/by-nc-nd/4.0/</dc:rights>
</oai_dc:dc></metadata></record></GetRecord></OAI-PMH>