<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">NLP-UNED at eRisk 2024: Approximate Nearest Neighbors with Encoding Refinement for Early Detecting Signs of Anorexia</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Hermenegildo</forename><surname>Fabregat</surname></persName>
							<email>gildo.fabregat@lsi.uned.es</email>
							<affiliation key="aff0">
								<orgName type="department">Dpto. Lenguajes y Sistemas Informáticos</orgName>
								<orgName type="laboratory">NLP &amp; IR Group</orgName>
								<orgName type="institution">Universidad Nacional de Educación a Distancia (UNED)</orgName>
								<address>
									<addrLine>Juan del Rosal 16</addrLine>
									<postCode>28040</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="department">Avature Machine Learning</orgName>
								<address>
									<addrLine>Marqués de Valdeiglesias, 3</addrLine>
									<postCode>28004</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Daniel</forename><surname>Deniz</surname></persName>
							<email>daniel.deniz@avature.es</email>
							<affiliation key="aff2">
								<orgName type="department">Avature Machine Learning</orgName>
								<address>
									<addrLine>Marqués de Valdeiglesias, 3</addrLine>
									<postCode>28004</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Andres</forename><surname>Duque</surname></persName>
							<email>aduque@lsi.uned.es</email>
							<affiliation key="aff0">
								<orgName type="department">Dpto. Lenguajes y Sistemas Informáticos</orgName>
								<orgName type="laboratory">NLP &amp; IR Group</orgName>
								<orgName type="institution">Universidad Nacional de Educación a Distancia (UNED)</orgName>
								<address>
									<addrLine>Juan del Rosal 16</addrLine>
									<postCode>28040</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">IMIENS: Instituto Mixto de Investigación</orgName>
								<orgName type="institution">Escuela Nacional de Sanidad</orgName>
								<address>
									<addrLine>Monforte de Lemos 5</addrLine>
									<postCode>28019</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Lourdes</forename><surname>Araujo</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Dpto. Lenguajes y Sistemas Informáticos</orgName>
								<orgName type="laboratory">NLP &amp; IR Group</orgName>
								<orgName type="institution">Universidad Nacional de Educación a Distancia (UNED)</orgName>
								<address>
									<addrLine>Juan del Rosal 16</addrLine>
									<postCode>28040</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">IMIENS: Instituto Mixto de Investigación</orgName>
								<orgName type="institution">Escuela Nacional de Sanidad</orgName>
								<address>
									<addrLine>Monforte de Lemos 5</addrLine>
									<postCode>28019</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Juan</forename><surname>Martinez-Romo</surname></persName>
							<email>juaner@lsi.uned.es</email>
							<affiliation key="aff0">
								<orgName type="department">Dpto. Lenguajes y Sistemas Informáticos</orgName>
								<orgName type="laboratory">NLP &amp; IR Group</orgName>
								<orgName type="institution">Universidad Nacional de Educación a Distancia (UNED)</orgName>
								<address>
									<addrLine>Juan del Rosal 16</addrLine>
									<postCode>28040</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">IMIENS: Instituto Mixto de Investigación</orgName>
								<orgName type="institution">Escuela Nacional de Sanidad</orgName>
								<address>
									<addrLine>Monforte de Lemos 5</addrLine>
									<postCode>28019</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">NLP-UNED at eRisk 2024: Approximate Nearest Neighbors with Encoding Refinement for Early Detecting Signs of Anorexia</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">62BD0BE489D4DE23EAEDDDAD5FF449B0</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:55+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Early risk detection, Anorexia, Approximate Nearest Neighbors, Contrastive Learning, Martinez-Romo) 0000-0001-9820-2150 (H. Fabregat)</term>
					<term>0000-0002-0313-2127 (D. Deniz)</term>
					<term>0000-0002-0619-8615 (A. Duque)</term>
					<term>0000-0002-7657-4794 (L. Araujo)</term>
					<term>0000-0002-6905-7051 (J. Martinez-Romo)</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>This paper describes our participation in Task 2 (Early Detection of Signs of Anorexia) from the CLEF 2024 eRisk Workshop, addressed to detecting early signs of anorexia in Social Media users through the analysis of their posts. A relabelling step based on Approximate Nearest Neighbors (ANN) is performed for generating a training dataset annotated at message level instead of user level, and then contrastive learning techniques are applied for refining the previously generated vector representations of the messages. ANNs are used also for classification purposes, combined with the use of rules and heuristics focused on expanding the number of considered messages from the user for making the final decision. Our system obtains the best results in both the decision-based evaluation, with 9 percentage points over the second best system in terms of latency-weighted F1, and in the ranking-based evaluation, with the best scores for 11 out of the 12 metrics employed.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>In recent years, the analysis of social media for early detection of health risks has become an intriguing and significant area of research. Within this research field, the eRisk workshop, part of the Conference and Labs of the Evaluation Forum (CLEF) since 2017, has played a pivotal role. This workshop fosters collaborative efforts to develop innovative methodologies and practical solutions for the early identification of various health concerns, including eating disorders, self-harm, pathological gambling and depression, through the analysis of textual content on social media platforms. By analyzing social media posts and messages, researchers can obtain valuable insights to identify individuals at risk. This paper details our approach to tackling Task 2 of the eRisk 2024 Workshop <ref type="bibr" target="#b0">[1,</ref><ref type="bibr" target="#b1">2]</ref>: Early Detection of Signs of Anorexia. In this task, systems must sequentially process messages posted by different users in Reddit forums, searching for early traces of anorexia, this is, detecting as soon as possible whether a user is at risk of suffering from anorexia. The task is a continuation of Task 2 of the eRisk 2018 Workshop <ref type="bibr" target="#b2">[3]</ref> and Task 1 of the eRisk 2019 Workshop <ref type="bibr" target="#b3">[4]</ref>.</p><p>Building upon our previous work in the detection of pathological gambling <ref type="bibr" target="#b4">[5,</ref><ref type="bibr" target="#b5">6,</ref><ref type="bibr" target="#b6">7]</ref>, we have refined our system by incorporating contrastive learning techniques for fine-tuning the encoded representations of text messages written by the analyzed users. Additional heuristics have been also included in the system in order to expand the context of the user's messages, this way taking into account a larger number of previous messages when making the final decision on whether the user is at risk. These improvements have proven to enhance the system's accuracy and reliability in detecting potential cases of anorexia from social media content.</p><p>The rest of the paper is structured as follows: Section 2 gathers information about previous research works related to early detection of risks, as well as systems participating in previous eRisk competitions. A brief description of the addressed task, and the dataset and evaluation metrics involved is presented in Section 3. The different components of the proposed system are described in Section 4, and the results obtained by this system are shown and analyzed in Section 5. Finally, Section 6 depicts some conclusions about the work, together with possible future lines of work regarding this research.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related Work</head><p>The automatic detection of mental health issues is currently a hot research topic within machine learning, specifically regarding natural language processing. The availability of information sources with large amounts of data, such as social media, is enabling the development of new systems aimed at the early detection of these types of issues. Within this context, different evaluation frameworks and campaigns such as CLEF's eRisk <ref type="bibr" target="#b7">[8]</ref>, CLPSych <ref type="bibr" target="#b8">[9]</ref> or IberLEF's MentalRiskES <ref type="bibr" target="#b9">[10,</ref><ref type="bibr" target="#b10">11]</ref> represent a significant effort by the scientific community to support the development and dissemination of these types of systems.</p><p>Anorexia nervosa (AN) is a severe eating disorder characterized by an inability to maintain a healthy body weight, often falling below 85% of the ideal weight. Individuals with AN obsess over weight gain, perceive their bodies as larger than they are, and engage in behaviors to sustain weight loss. This illness profoundly affects both mind and body, with sufferers placing significant importance on their shape and weight, intertwining their self-esteem with their body image <ref type="bibr" target="#b11">[12]</ref>. The 2018 and 2019 CLEF eRisk competitions addressed the automatic detection of signs of anorexia in Social Media posts, encouraging the participating systems to develop techniques for determining whether a user can be classified as at risk of suffering from this illness. Although the stage of development of neural models was nowhere near the current level when the last edition of this task was held (2019), some of the best participating systems at that time used such models for their predictions. An ensemble approach with different neural attention-based models is used in <ref type="bibr" target="#b12">[13]</ref> for feature extraction, and then combined with Support Vector Machines to determine the final decision. Deep learning models are also used in <ref type="bibr" target="#b13">[14]</ref> for developing a time series dataset representing the evolution of the user's mood through time. Then, Bayesian inference is employed for performing the final classification. Other approaches obtained good results in the competition by using more classic machine learning methods such as statistical word-based techniques <ref type="bibr" target="#b14">[15]</ref>, or Support Vector Machines with customized feature sets based on emotions derived from the text <ref type="bibr" target="#b15">[16]</ref> or content-based features from phrases with personal pronouns <ref type="bibr" target="#b16">[17]</ref>. In general, and also based on the results obtained by our own participations in early risk detection tasks, systems not relying on deep learning techniques or large language models are also able to achieve good results <ref type="bibr" target="#b6">[7]</ref>.</p><p>Contrastive learning techniques can be defined as methods aimed to learn and refine effective representations of data by pulling semantically close neighbors together and pushing dissimilar ones apart <ref type="bibr" target="#b17">[18]</ref>. One of the most important characteristics of contrastive learning is that the model learns by comparison, this is, it is not necessary for the instances whose representations are to be refined to be accompanied by their corresponding labels. Instead, these approaches only need to define the similarity distribution. This way, the model should learn to map together similar instances, while separating dissimilar instances in the embedding space <ref type="bibr" target="#b18">[19]</ref>. These techniques have been successfully applied to computer vision problems <ref type="bibr" target="#b19">[20]</ref> and natural language processing tasks <ref type="bibr" target="#b20">[21]</ref>, as well as to other domains such as audio or reinforcement learning <ref type="bibr" target="#b21">[22]</ref>. Considering our system presented in previous eRisk competitions, based on approximate nearest neighbors with vector representations of text messages, exploring these techniques seems like a logical step for its improvement.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Task 2: Early Detection of Signs of Anorexia</head><p>As previously mentioned, we have participated in task 2 of the eRisk 2024 competition, denoted "Early Detection of Signs of Anorexia". In this task, participants have access to a training dataset containing the whole history of writings (Reddit posts) for a set of users. These users are annotated depending on whether they have explicitly mentioned to have been diagnosed with anorexia (positive users) or not (negative or control users). In the test stage, systems are asked to determine, as soon as possible, whether a new user is at risk of suffering from anorexia according to the user's writing history. In particular, for each new message of a user, systems must determine whether the user is positive or negative. Once a user is labelled as positive, the decision is considered to be final, and hence all subsequent labels assigned to this user are ignored. Systems must also assign, after each message, a score measuring the user's risk of suffering from anorexia. This score is considered for evaluation purposes even after a user has been labelled as positive.</p><p>The statistics of the test dataset used for evaluating systems participating in this task are shown in Table <ref type="table" target="#tab_0">1</ref>: System evaluation is conducted using two different paradigms: decision-based evaluation and rankingbased evaluation. Complete information about the employed metrics can be found in <ref type="bibr" target="#b22">[23]</ref>.</p><p>• Decision-based evaluation: This type of evaluation only attends to the label assigned by the system to each user (positive or negative), as well as the delay in determining that a positive user is indeed at risk of suffering from anorexia. For this aim, standard metrics used for classification such as precision, recall and F-Measure are combined with metrics that take into account this delay information. The early risk detection error metric ERDE <ref type="bibr" target="#b23">[24]</ref> is also used, although their values have low interpretability. To overcome this, other metrics regarding the latency and speed on detecting true positives are also proposed, and a final latency-weighted F1 measure is computed by weighting the F-Measure with these delay-related metrics. • Ranking-based evaluation: The score assigned to each user by the system, after analyzing each received message, is used in this evaluation for computing ranking-based metrics. This is, users are ranked after 𝐾 messages according to this score, and then standard ranking metrics such as 𝑃 @𝐾 and 𝑁 𝐷𝐶𝐺@𝐾 are applied for measuring the performance of the systems.</p><p>Finally, the lapse of time employed by the system for processing the whole test dataset is also measured and reported, in order to illustrate the efficiency of the proposed systems.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Proposed System</head><p>The system developed for performing early detection of signs of anorexia is presented in this Section. In particular, the different components that constitute the complete system pipeline are enumerated and described in detail. The main differences with the original research, based on dataset relabelling and approximate nearest neighbors techniques, presented in <ref type="bibr" target="#b4">[5]</ref>, are the use of a contrastive learning technique for fine-tuning the embedding representations of the user's messages (Section 4.3), as well as the development of a set of heuristics for considering previous messages for the final classification, instead of only taking into account the last message received (Section 4.4).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Data representation</head><p>The encoder used in this work for obtaining embeddings representing each of the messages of a particular user is the Universal Sentence Encoder <ref type="bibr" target="#b24">[25]</ref>. Through its use, all messages in the training dataset are transformed into 512-dimensional embeddings. The specific model used in the encoding is based on a Deep Average Network (DAN) <ref type="bibr" target="#b25">[26]</ref>, trained on different sources of data written in English, and normally used for generating vector representations of texts longer than words, i.e., sentences, phrases or short paragraphs.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Relabelling process</head><p>The relabelling process has been described in previous works <ref type="bibr" target="#b4">[5,</ref><ref type="bibr" target="#b6">7]</ref>. Its main objective is to generate a training dataset labelled at message-level, starting from the user-level annotation provided by the organizers. The intuition behind this decision, already tested in previous eRisk competitions devoted to detecting pathological gambling, is that message-level annotations can help the system to emit accurate alerts about the risk of a user of suffering from anorexia by analyzing the user's individual messages.</p><p>In this stage a technique for generating indexes based on approximate nearest neighbors (ANN) is applied, this way creating a data structure that allows us to obtain the 𝑁 most similar messages to a specific one. Two different ANN approaches have been explored in this work: first, Annoy <ref type="bibr" target="#b26">[27]</ref> is a partitioning method based on the use of hyperplanes that recursively divide the search space with random direction. The generated index has the shape of a binary tree, and through its use the most similar elements to a query can be easily retrieved. On the other hand, the Hierarchical Navigable Small World (HNSW) method, implemented by the Non-Metric Space Library (NMSLIB) <ref type="bibr" target="#b27">[28]</ref> is a graph-based ANN technique. In this case, the search index has the form of a proximity graph in which nodes correspond to particular instances (in our case, messages), and edges define the neighborhood relationship. The main idea behind the use of this technique is that a neighbor's neighbor is likely to also be a neighbor of a particular instance. Nearest neighbor retrieval is then performed by using a best-first search strategy on the graph.</p><p>Once that the selected index has been built on all the messages composing the training dataset, we are able to retrieve all the desired nearest neighbors given a particular message. In the first iteration of the relabelling process, all messages are labelled as belonging to the same class (positive or negative) as the user that created them. Then, for each positive message 𝑀 in the training dataset, a set of its 𝐾 nearest neighbors is retrieved from the index. The message will be relabelled as negative only if at least 𝐽 of those 𝐾 nearest messages belong to the negative class. In our implementation, only positive messages can be relabelled as negative. This is due to the fact that only positive users can have negative messages, because if negative users had any positive message they would have been labelled as positive. Only messages containing title information, this is, messages representing the opening of a Reddit thread, are taken into account for generating our training dataset. This filtering allows us to focus on discussions originally initiated by the analyzed user, which are more likely to contain information about particular worries or calls for help from the user. Moreover, this also reduces the computational complexity of the system, while the final results do not significantly differ from those obtained by using the complete set of messages. The relabelling step is iteratively repeated until convergence is reached, this is, no new relabellings are done during an iteration. A random sample of 33% of the users in the original training dataset is employed for validation purposes, allowing us to explore the optimal values of the 𝐾 and 𝐽 parameters. Through this validation step, these values have been set to 𝐾 = 10 and 𝐽 = 6.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Contrastive Learning</head><p>After completing the relabelling process, we propose an additional technique in the encoding step of our system based on fine-tuning the generated embeddings representing the different messages. This fine-tuning relies on a contrastive learning technique <ref type="bibr" target="#b28">[29]</ref>, a method employed for maximizing the distance between embeddings of messages belonging to different classes and minimizing it when the messages belong to the same class. In particular, in our system this is achieved by retraining the Universal Sentence Encoder used for generating the initial representations of the messages. However, during this retraining, we employ a particular type of loss function, known as triplet loss <ref type="bibr" target="#b29">[30]</ref>. For each message in the training dataset, either labelled as positive or negative, a triplet (𝑎, 𝑝, 𝑛) is created, being 𝑎 the original message, 𝑝 a message belonging to the same class, and 𝑛 a message belonging to the opposite class. The triplet loss function used in our retraining is ℒ = 𝑚𝑎𝑥(𝑑(𝑎, 𝑝) − 𝑑(𝑎, 𝑛) + 𝛼, 0), where 𝑑 is a function measuring the distance between the generated embeddings. The distance function employed for this work is cosine distance. This implies that the main aim of the training process will be to minimize the distance between messages belonging to the same class and maximize the distance between messages belonging to different classes. An additional parameter 𝛼 is included into the loss function in order to determine the minimum desired distance between positive and negative instances, considering 𝑎 as reference instance.</p><p>The main idea behind the contrastive learning process is illustrated in Figure <ref type="figure" target="#fig_0">1</ref>. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.4.">Final classification</head><p>Once that the representation of the text messages is refined using contrastive learning techniques, the final classification step is somehow similar to the relabelling process described in Section 4.2. However, some additional heuristics have been added to this stage in order to consider more than one individual message for determining whether a user is at risk of suffering from anorexia.</p><p>Two new 𝐾 and 𝐽 parameters are calculated in this step for performing the final classification. Each time a new message 𝑀 is received, the 𝐾 nearest neighbors are retrieved. If at least 𝐽 of those 𝐾 neighbors are positive, the message, and hence the user, is directly classified as positive. Through the use of the validation split aforementioned, the values of these parameters have been set to 𝐾 = 19 and 𝐽 = 19 for the classification step.</p><p>As previously mentioned, we are also interested in analyzing whether the history of previous messages from the user can be useful for performing a more accurate classification. With this purpose, we have explored in more depth how assigning risk scores to the user after analyzing each message can affect the final classification. Besides the classification of the user as positive or negative, and regarding the ranking-based evaluation, a score is expected to be assigned to the user after receiving each message, representing the user's risk of suffering from anorexia. In our system, this score is computed by calculating the average distance between a received message 𝑀 and all its nearest neighbors labelled as positive, 𝑣𝑎𝑙 = 1 𝑘 ∑︀ 𝑘 𝑥=1 𝑑𝑖𝑠𝑡𝑎𝑛𝑐𝑒(𝑈 𝑥 , 𝑀 ), where 𝑈 𝑥 is a message within the set of 𝐾 nearest neighbors that is labelled as positive. The distance function employed returns values between 0 and 2, and hence the scoring assigned to the user is 𝑠𝑐𝑜𝑟𝑒 = (2 − 𝑣𝑎𝑙). This way, a message really close to its positive neighbors would receive a distance value of 𝑣𝑎𝑙 ≈ 0 and hence its score would be 𝑠𝑐𝑜𝑟𝑒 ≈ 2. This score is calculated for test messages classified as positive, but also for those classified as negative, and a buffer containing the scores of the 𝑁 previous messages from the user is stored. The buffer is originally filled with zeros. Hence, if the system initially classifies a message as negative, the average score value for the last 𝑁 messages is calculated, and the message (and user) will be classified as positive if this average is over a particular threshold 𝑆. The optimal values of 𝑁 and 𝑆 (this is, the message window considered and the score threshold) are also determined using the validation split and vary depending on the submitted run (see Section 5.1).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Results and Discussion</head><p>Main results achieved by the proposed system are presented in this Section. Experiments using the validation split are first depicted in order to justify the configurations selected for the submitted runs. Only decision-based evaluation, and more particularly, latency-weighted F1 values, were taken into account for tuning the hyperparameters through the validation split. Then, results obtained on the test dataset by the 5 different configurations selected are shown.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.1.">Validation and Selected Runs</head><p>As previously mentioned, a random split of 33% of the users in the training dataset is employed for validation purposes. Through these experiments we have confirmed that the use of the contrastive learning technique is able to improve all previous results obtained when using the Universal Sentence Encoder with no modifications for generating the embeddings. In particular, the latency-weighted F1 value of the best performing configuration that uses the original encoder is around 6% lower than the best performing system in our validation process. For this reason, we decided to use the contrastive learning encoder in all the submitted runs. In general, applying the relabelling method also improves the results with respect to not using it (this is, labelling all messages from a positive user as positive and all messages from a negative user as negative). However, we included a run that does not perform any relabelling in the test configurations, in order to compare results. The remaining parameters (values 𝐾 and 𝐽 in either relabelling or classification, and values 𝑁 and 𝑆) have been adjusted by selecting the best performing configurations in the validation phase. As already stated, values of 𝐾 = 10 and 𝐽 = 6 during relabelling and 𝐾 = 19 and 𝐽 = 19 during classification showed the best results in this stage.</p><p>Table <ref type="table" target="#tab_2">2</ref> shows the configurations of the proposed system, for each of the five runs allowed to be submitted in the task.</p><p>Column "ANN system" indicates the technique employed for building the nearest neighbor index: Annoy or NMSLIB. The type of encoder employed is always the one that refines the Universal Sentence Encoding with contrastive learning (CL_USE). Column "Relabel" indicates whether the relabelling step has been followed or not, while column "Heuristics" shows values for parameters 𝑁 (window size) and 𝑆 (decision threshold) in case the rules described in Section 4.4 have been employed, and "None" otherwise. It can be noticed how the best value for parameter 𝑆 is always set to 1.0, this is, half the maximum scoring value that the average score for the 𝑁 last messages can reach. Finally, we can observe how the latency-weighted F1 metric is quite similar in this validation for all the proposed configurations, except for R3, which does not include the relabelling step.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.2.">Test results</head><p>The following tables illustrate the main results achieved by our system regarding the two types of evaluations considered, as well as the comparison with the other teams participating in the task. In particular, Table <ref type="table">3</ref> shows results according to the decision-based evaluation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 3</head><p>Test results: Results of the decision-based evaluation for task T2. Bold indicates the best result for each considered metric.  <ref type="formula">9</ref>) COS-470-Team-2 0 0.00 0.00 0.00 0.12 0.12 <ref type="bibr" target="#b9">(10)</ref> As the table shows, all the configurations proposed for our system are able to overcome all participating systems in terms of latency-weighted F1. In particular, our best performing run, R1, is 9% ahead of the second best performing team. Although some other teams obtain slightly better results regarding precision and recall, the F1 and latency-weighted F1 values show that our proposal is the most robust across the considered metrics. Our system also obtains good results for some of the early risk detection metrics. In particular, it achieves the third best ERDE5 and second best ERDE50 values, although the latency and speed values are somewhat worse. It is particularly noticeable how all the proposed runs are able to obtain good results. This probably indicates that the main improvement proposed, which is the use of a contrastive learning technique for refining the embeddings representing text messages, has a powerful impact on the performance of our system. On the other hand, the use of heuristics for increasing the amount of information considered before classifying a message, does not seem to have that much impact on the final results. However, in the validation stage we have stated that when contrastive learning is not performed on the original embeddings, the use of these heuristics does positively influence the results. Therefore, future efforts should be focused on improving these rules.</p><p>Table <ref type="table">4</ref> shows the main results on the ranking-based evaluation. Once again, our system ranks first in this type of evaluation, for almost all the considered metrics, and for any of the proposed configurations. In particular, we are able to achieve perfect scores for 𝑃 @10 and 𝑁 𝐷𝐶𝐺@10 after receiving 1, 100, 500 and 1000 messages, and the best results for 𝑁 𝐷𝐶𝐺@100</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 4</head><p>Test results: Results of the ranking-based evaluation for task T2. Bold indicates the best result for each considered metric.</p><p>1 writing 100 writings 500 writings 1000 writings P@10 NDCG@10 NDCG@100 P@10 NDCG@10 NDCG@100 P@10 NDCG@10 NDCG@100 P@10 NDCG@10 NDCG@100 NLP-UNED R0 1.00 1.00 0.44 1.00 1.00 0.89 1.00 1.00 0.91 1.00 1.00 0.91 NLP-UNED R1 1.00 1.00 0.44 1.00 1.00 0.89 1.00 1.00 0.92 1.00 1.00 0.92 NLP-UNED R2 1.00 1.00 0.44 1.00 1.00 0.89 1.00 1.00 0.91 1.00 1.00 0.91 NLP-UNED R3 1.00 1.00 0.45 1.00 1.00 0.91 1.00 1.00 0.91 1.00 1.00 0.91 NLP-UNED R4 1.00 1.00 0.44 1.00 1.00 0.89 1.00 1.00 0.91 1.00 1.00 0.91 UNSL R1 1.00 1.00 0.69 1.00 1.00 0.80 0.90 0.81 0.69 0.80 0.88 0.72 Riewe-Perla R0 0.50 0.47 0.17 0.70 0.62 0.74 0.70 0.62 0.74 0.70 0.62 0.75 GVIS R1 0.40 0.37 0.40 0.30 0.32 0.42 0.00 0.00 0.00 0.00 0.00 0.00 ELiRF-UPV R0 0.20 0.12 0.14 0.20 0.13 0.14 0.20 0.13 0.14 0.20 0.13 0.14 UMUTeam R1 0.20 0.12 0.14 0.10 0.06 0.03 0.00 0.00 0.05 0.20 0.21 0.12 BioNLP-IISERB R4 0.20 0.21 0.10 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ABP-UC3M R0 0.00 0.00 0.03 0.40 0.56 0.26 0.00 0.00 0.09 0.00 0.00 0.13 SINAI R3 0.00 0.00 0.07 0.10 0.07 0.06 0.00 0.00 0.07 0.00 0.00 0.07 COS-470-Team-2 R0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 after receiving 100, 500 and 1000. Only the UNSL team is able to beat our system for the 𝑁 𝐷𝐶𝐺@100 after seeing only the first message of each user. Together with our latency and speed values in the decision-based evaluation, this fact indicates that our system could be improved in terms of speed in finding true positives, this is, determining that a user is at risk of suffering from anorexia.</p><p>Finally, Table <ref type="table">5</ref> shows some information regarding the number of runs submitted by the participating teams, the number of total writings processed by each team, and the total time employed in processing the messages.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 5</head><p>Participating teams, number of runs, number of user writings processed by the team, and lapse of time taken for the entire process. Compared to the other participating systems that processed the complete set of user writings, our system is the third best performing regarding execution times, the time interval being in the order of hours, in a similar manner to the best performing teams.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Team</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.">Conclusions and Future Work</head><p>This paper presents our participation in Task 2 of the CLEF eRisk 2024 competition: Early Detection of Signs of Anorexia. The developed system is a new version of the system designed for previous editions of the competition, in which a relabelling method based on the use of approximate nearest neighbors (ANN) is applied on the training dataset, and the same ANN techniques are then used for classifying new messages and determining whether a user is at risk of suffering from a mental problem, in this case anorexia. The new improvements incorporated to the system is the use of contrastive learning techniques for fine-tuning the embeddings of the text messages, initially generated through a Universal Sentence Encoder, and the increasing of the amount of information employed for classification by including a set of rules or heuristics that consider a message window of 𝑁 previous messages. The developed system is able to obtain the best results among the participating systems in terms of F-Measure and latency-weighted F1 (decision-based evaluation), as well as in terms of ranking-based evaluation metrics. In particular, all the tested configurations of the system overcome the second best participating team by around 9% of latency-weighted F1. In general, the main results indicate that the refinement of the vector representations obtained through contrastive learning techniques has been crucial for a better discrimination between positive and negative messages, thus leading the system to effectively determine when a message may indicate that the user is at risk of suffering from anorexia. On the other hand, expanding the message window considered for performing the final classification has not shown significant impact on the test results, although during the validation stage those configurations using these heuristics were able to obtain better overall results with respect to configurations only using one message for making a decision.</p><p>As mentioned in Section 5.1, future lines of work should focus on improving the rules designed for considering the history of messages before classifying a user. A trade-off must be found between the latency (this is, number of messages analyzed before emitting an alert) and the amount of information that should be gathered before making a decision. Also, the treatment of these previous messages can be improved: for instance, the current rules underestimate the weight of similar positive messages when few messages have been received, since the buffer of previous scores is initialized with zeros. This implies that even if a message is quite similar to positive messages its score is going to decrease when it is one of the first analyzed messages for a user. The current decision of selecting only the nearest positive messages for calculating the score can also be detrimental for the final results. More research should be done on the type of functions that better model the similarity of a given message with both positive and negative nearest neighbors, and its influence on the classification decision.</p><p>An additional future line of research involves further refinement of the embeddings used for representing users' messages. In particular, the hyperparameters used in the contrastive learning phase, described in Section 4.3 can be studied in greater depth through validation techniques, in order to search for optimal values. Additionally, different encoding models beyond the Universal Sentence Encoder could be also considered, exploring issues such as multilingualism or models that have already used contrastive learning techniques in their original training, like E5 <ref type="bibr" target="#b30">[31]</ref>.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Contrastive learning with triplet loss: training is oriented to maximizing the distance between same class (𝑝) and opposite class (𝑛) instances with respect to a given anchor instance (𝑎).</figDesc><graphic coords="5,170.14,253.21,255.00,117.90" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Main statistics of test collection for task 2: Early detection of signs of anorexia.</figDesc><table><row><cell>Anorexia Control</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Avg num. of days from first to last submission ≈ 482 ≈ 971 Avg num. words per submission</head><label></label><figDesc></figDesc><table><row><cell>28.5</cell><cell>21.4</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 2</head><label>2</label><figDesc>Validation results: Configurations selected for the test phase.</figDesc><table><row><cell cols="4">Run ANN system Encoder Relabel</cell><cell>Heuristics</cell><cell>Latency-weighted F1 (validation)</cell></row><row><cell>R0</cell><cell>Annoy</cell><cell>CL_USE</cell><cell>YES</cell><cell>𝑁 = 7, 𝑆 = 1.0</cell><cell>0.6967</cell></row><row><cell>R1</cell><cell>Annoy</cell><cell>CL_USE</cell><cell>YES</cell><cell>None</cell><cell>0.6862</cell></row><row><cell>R2</cell><cell>Annoy</cell><cell>CL_USE</cell><cell>YES</cell><cell>𝑁 = 5, 𝑆 = 1.0</cell><cell>0.6863</cell></row><row><cell>R3</cell><cell>Annoy</cell><cell>CL_USE</cell><cell>NO</cell><cell>None</cell><cell>0.6506</cell></row><row><cell>R4</cell><cell>NMSLIB</cell><cell>CL_USE</cell><cell>YES</cell><cell>𝑁 = 7, 𝑆 = 1.0</cell><cell>0.6915</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>#Runs #User Writings Processed Lapse of Time (from 1st to last response)</head><label></label><figDesc></figDesc><table><row><cell>BioNLP-IISERB</cell><cell>5</cell><cell>10</cell><cell>09:39</cell></row><row><cell>GVIS</cell><cell>5</cell><cell>352</cell><cell>3 days 12:36</cell></row><row><cell>Riewe-Perla</cell><cell>5</cell><cell>2001</cell><cell>2 days 11:25</cell></row><row><cell>UNSL</cell><cell>3</cell><cell>2001</cell><cell>07:00</cell></row><row><cell>UMUTeam</cell><cell>5</cell><cell>2001</cell><cell>06:34</cell></row><row><cell>COS-470-Team-2</cell><cell>5</cell><cell>1</cell><cell>-</cell></row><row><cell>ELiRF-UPV</cell><cell>4</cell><cell>2001</cell><cell>12:27</cell></row><row><cell>NLP-UNED</cell><cell>5</cell><cell>2001</cell><cell>09:40</cell></row><row><cell>SINAI</cell><cell>5</cell><cell>2001</cell><cell>3 days 23:49</cell></row><row><cell>APB-UC3M</cell><cell>2</cell><cell>2001</cell><cell>6 days 21:34</cell></row></table></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>This work has been partially supported by the Spanish Ministry of Science and Innovation within the DOTT-HEALTH Project (MCI/AEI/FEDER, UE) under Grant PID2019-106942RB-C32, OBSER-MENH Project (MCIN/AEI/10.13039 and NextGenerationEU"/PRTR) under Grant TED2021-130398B-C21 and EDHER-MED Project under grant PID2022-136522OB-C21, as well as by the Universidad Nacional de Educación a Distancia (UNED) within project SICAMESP (2023-VICE-0029).</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Overview of erisk 2024: Early risk prediction on the internet</title>
		<author>
			<persName><forename type="first">J</forename><surname>Parapar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Martín Rodilla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction. 15th International Conference of the CLEF Association, CLEF 2024</title>
				<meeting><address><addrLine>Grenoble, France</addrLine></address></meeting>
		<imprint>
			<publisher>Springer International</publisher>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Overview of erisk 2024: Early risk prediction on the internet (extended overview)</title>
		<author>
			<persName><forename type="first">J</forename><surname>Parapar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Martín Rodilla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum CLEF 2024</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<meeting><address><addrLine>Grenoble, France; CLEF</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2024">September 9th to 12th, 2024. 2024. 2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Overview of erisk: Early risk prediction on the internet (extended lab overview)</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Parapar</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-2125/invited_paper_1.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2018 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Nie</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">L</forename><surname>Soulier</surname></persName>
		</editor>
		<meeting><address><addrLine>Avignon, France</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2018">September 10-14, 2018. 2125. 2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Overview of erisk at CLEF 2019: Early risk prediction on the internet (extended overview)</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Parapar</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-2380/paper_248.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2019 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Müller</surname></persName>
		</editor>
		<meeting><address><addrLine>Lugano, Switzerland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">September 9-12, 2019. 2019</date>
			<biblScope unit="volume">2380</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">UNED-NLP at erisk 2022: Analyzing gambling disorders in social media using approximate nearest neighbors</title>
		<author>
			<persName><forename type="first">H</forename><surname>Fabregat</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Duque</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Araujo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Martínez-Romo</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-3180/paper-71.pdf" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Working Notes of CLEF 2022 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Hanbury</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</editor>
		<meeting>the Working Notes of CLEF 2022 -Conference and Labs of the Evaluation Forum<address><addrLine>Bologna, Italy</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2022">September 5th -to -8th, 2022. 2022</date>
			<biblScope unit="volume">3180</biblScope>
			<biblScope unit="page" from="894" to="904" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">NLP-UNED-2 at erisk 2023: Detecting pathological gambling in social media through dataset relabeling and neural networks</title>
		<author>
			<persName><forename type="first">H</forename><surname>Fabregat</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Duque</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Araujo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Martínez-Romo</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-3497/paper-056.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2023)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">M</forename><surname>Aliannejadi</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Vlachos</surname></persName>
		</editor>
		<meeting><address><addrLine>Thessaloniki, Greece</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">September 18th to 21st, 2023. 2023</date>
			<biblScope unit="volume">3497</biblScope>
			<biblScope unit="page" from="672" to="683" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">A re-labeling approach based on approximate nearest neighbors for identifying gambling disorders in social media</title>
		<author>
			<persName><forename type="first">H</forename><surname>Fabregat</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Duque</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Araujo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Martínez-Romo</surname></persName>
		</author>
		<idno type="DOI">10.1007/978-3-031-42448-9_15</idno>
		<ptr target="https://doi.org/10.1007/978-3-031-42448-9_15.doi:10.1007/978-3-031-42448-9\_15" />
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction -14th International Conference of the CLEF Association, CLEF 2023</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<editor>
			<persName><forename type="first">A</forename><surname>Arampatzis</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Kanoulas</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">T</forename><surname>Tsikrika</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">S</forename><surname>Vrochidis</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Giachanou</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><surname>Li</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Aliannejadi</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Vlachos</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<meeting><address><addrLine>Thessaloniki, Greece</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2023">September 18-21, 2023. 2023</date>
			<biblScope unit="volume">14163</biblScope>
			<biblScope unit="page" from="174" to="185" />
		</imprint>
	</monogr>
	<note>Proceedings</note>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Overview of erisk 2023: Early risk prediction on the internet</title>
		<author>
			<persName><forename type="first">J</forename><surname>Parapar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Martín Rodilla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction. 14th International Conference of the CLEF Association, CLEF 2023</title>
				<meeting><address><addrLine>Thessaloniki, Greece</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Overview of the CLPsych 2024 shared task: Leveraging large language models to identify evidence of suicidality risk in online posts</title>
		<author>
			<persName><forename type="first">J</forename><surname>Chim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Tsakalidis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Gkoumas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Atzil-Slonim</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Ophir</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Zirikly</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Resnik</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Liakata</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2024.clpsych-1.15" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024), Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">A</forename><surname>Yates</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Desmet</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Prud'hommeaux</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Zirikly</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">S</forename><surname>Bedrick</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">S</forename><surname>Macavaney</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">K</forename><surname>Bar</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Ireland</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Y</forename><surname>Ophir</surname></persName>
		</editor>
		<meeting>the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024), Association for Computational Linguistics<address><addrLine>St. Julians, Malta</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2024">2024</date>
			<biblScope unit="page" from="177" to="190" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Overview of mentalriskes at iberlef 2023: Early detection of mental disorders risk in spanish, Proces. del Leng</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">M</forename><surname>Mármol-Romero</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Moreno-Muñoz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">M P</forename><surname>Del Arco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">D</forename><surname>Molina-González</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">T M</forename><surname>Valdivia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">A U</forename><surname>López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Montejo-Ráez</surname></persName>
		</author>
		<ptr target="http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6564" />
	</analytic>
	<monogr>
		<title level="j">Natural</title>
		<imprint>
			<biblScope unit="volume">71</biblScope>
			<biblScope unit="page" from="329" to="350" />
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Mentalriskes: A new corpus for early detection of mental disorders in spanish</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">M</forename><surname>Mármol-Romero</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Moreno-Muñoz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">M P</forename><surname>Del Arco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">D</forename><surname>Molina-González</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">T M</forename><surname>Valdivia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">A U</forename><surname>López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Montejo-Ráez</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2024.lrec-main.978" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC/COLING 2024</title>
				<editor>
			<persName><forename type="first">N</forename><surname>Calzolari</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Kan</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">V</forename><surname>Hoste</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Lenci</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">S</forename><surname>Sakti</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Xue</surname></persName>
		</editor>
		<meeting>the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC/COLING 2024<address><addrLine>Torino, Italy</addrLine></address></meeting>
		<imprint>
			<publisher>ELRA and ICCL</publisher>
			<date type="published" when="2024-05-25">20-25 May, 2024. 2024</date>
			<biblScope unit="page" from="11204" to="11214" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Anorexia nervosa: definition, epidemiology, and cycle of risk</title>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">M</forename><surname>Bulik</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Reba</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A.-M</forename><surname>Siega-Riz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Reichborn-Kjennerud</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">International Journal of Eating Disorders</title>
		<imprint>
			<biblScope unit="volume">37</biblScope>
			<biblScope unit="page" from="S2" to="S9" />
			<date type="published" when="2005">2005</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Quick and (maybe not so) easy detection of anorexia in social media posts</title>
		<author>
			<persName><forename type="first">E</forename><surname>Mohammadi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Amini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Kosseim</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-2380/paper_74.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2019 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Müller</surname></persName>
		</editor>
		<meeting><address><addrLine>Lugano, Switzerland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">September 9-12, 2019. 2019</date>
			<biblScope unit="volume">2380</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Attentive multi-stage learning for early risk detection of signs of anorexia and self-harm on social media</title>
		<author>
			<persName><forename type="first">W</forename><surname>Ragheb</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Azé</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Bringay</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Servajean</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-2380/paper_126.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2019 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Müller</surname></persName>
		</editor>
		<meeting><address><addrLine>Lugano, Switzerland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">September 9-12, 2019. 2019</date>
			<biblScope unit="volume">2380</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">UNSL at erisk 2019: a unified approach for anorexia, self-harm and depression detection in social media</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">G</forename><surname>Burdisso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Errecalde</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Montes-Y-Gómez</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-2380/paper_103.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2019 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Müller</surname></persName>
		</editor>
		<meeting><address><addrLine>Lugano, Switzerland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">September 9-12, 2019. 2019</date>
			<biblScope unit="volume">2380</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">INAOE-CIMAT at erisk 2019: Detecting signs of anorexia using fine-grained emotions</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">E</forename><surname>Aragón</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">P</forename><surname>López-Monroy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Montes-Y-Gómez</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-2380/paper_113.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2019 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Müller</surname></persName>
		</editor>
		<meeting><address><addrLine>Lugano, Switzerland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">September 9-12, 2019. 2019</date>
			<biblScope unit="volume">2380</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Ltl-inaoe&apos;s participation at erisk 2019: Detecting anorexia in social media through shared personal information</title>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">M</forename><surname>Ortega-Mendoza</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">I H</forename><surname>Farías</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Montes-Y-Gómez</surname></persName>
		</author>
		<ptr target="https://ceur-ws.org/Vol-2380/paper_75.pdf" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2019 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Müller</surname></persName>
		</editor>
		<meeting><address><addrLine>Lugano, Switzerland</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">September 9-12, 2019. 2019</date>
			<biblScope unit="volume">2380</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Dimensionality reduction by learning an invariant mapping</title>
		<author>
			<persName><forename type="first">R</forename><surname>Hadsell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Chopra</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Lecun</surname></persName>
		</author>
		<idno type="DOI">10.1109/CVPR.2006.100</idno>
		<ptr target="https://doi.org/10.1109/CVPR.2006.100.doi:10.1109/CVPR.2006.100" />
	</analytic>
	<monogr>
		<title level="m">IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR 2006)</title>
				<meeting><address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>IEEE Computer Society</publisher>
			<date type="published" when="2006-06">2006. June 2006. 2006</date>
			<biblScope unit="page" from="1735" to="1742" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Contrastive representation learning: A framework and review</title>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">H</forename><surname>Le-Khac</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Healy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">F</forename><surname>Smeaton</surname></persName>
		</author>
		<idno type="DOI">10.1109/ACCESS.2020.3031549</idno>
		<ptr target="https://doi.org/10.1109/ACCESS.2020.3031549.doi:10.1109/ACCESS.2020.3031549" />
	</analytic>
	<monogr>
		<title level="j">IEEE Access</title>
		<imprint>
			<biblScope unit="volume">8</biblScope>
			<biblScope unit="page" from="193907" to="193934" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">A simple framework for contrastive learning of visual representations</title>
		<author>
			<persName><forename type="first">T</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kornblith</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Norouzi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">E</forename><surname>Hinton</surname></persName>
		</author>
		<ptr target="http://proceedings.mlr.press/v119/chen20j.html" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 37th International Conference on Machine Learning, ICML 2020</title>
				<meeting>the 37th International Conference on Machine Learning, ICML 2020<address><addrLine>PMLR</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2020-07">July 2020. 2020</date>
			<biblScope unit="volume">119</biblScope>
			<biblScope unit="page" from="1597" to="1607" />
		</imprint>
	</monogr>
	<note>Proceedings of Machine Learning Research</note>
</biblStruct>

<biblStruct xml:id="b20">
	<monogr>
		<title level="m" type="main">Simcse: Simple contrastive learning of sentence embeddings</title>
		<author>
			<persName><forename type="first">T</forename><surname>Gao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Yao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Chen</surname></persName>
		</author>
		<idno>CoRR abs/2104.08821</idno>
		<ptr target="https://arxiv.org/abs/2104.08821.arXiv:2104.08821" />
		<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<monogr>
		<title level="m" type="main">Representation learning with contrastive predictive coding</title>
		<author>
			<persName><forename type="first">A</forename><surname>Van Den Oord</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Vinyals</surname></persName>
		</author>
		<idno>CoRR abs/1807.03748</idno>
		<ptr target="http://arxiv.org/abs/1807.03748.arXiv:1807.03748" />
		<imprint>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Overview of erisk at CLEF 2021: Early risk prediction on the internet (extended overview)</title>
		<author>
			<persName><forename type="first">J</forename><surname>Parapar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Martín-Rodilla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</author>
		<ptr target="http://ceur-ws.org/Vol-2936/paper-72.pdf" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Working Notes of CLEF 2021 -Conference and Labs of the Evaluation Forum</title>
				<meeting>the Working Notes of CLEF 2021 -Conference and Labs of the Evaluation Forum<address><addrLine>Bucharest, Romania</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2021">2021. 2936. 2021</date>
			<biblScope unit="page" from="864" to="887" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">A test collection for research on depression and language use</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">E</forename><surname>Losada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</author>
		<idno type="DOI">10.1007/978-3-319-44564-9_3</idno>
		<idno>doi:</idno>
		<ptr target="10.1007/978-3-319-44564-9\_3" />
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction -7th International Conference of the CLEF Association, CLEF 2016</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<editor>
			<persName><forename type="first">N</forename><surname>Fuhr</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Quaresma</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">T</forename><surname>Gonçalves</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Larsen</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">K</forename><surname>Balog</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">C</forename><surname>Macdonald</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">L</forename><surname>Cappellato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<meeting><address><addrLine>Évora, Portugal</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2016">September 5-8, 2016. 9822. 2016</date>
			<biblScope unit="page" from="28" to="39" />
		</imprint>
	</monogr>
	<note>Proceedings</note>
</biblStruct>

<biblStruct xml:id="b24">
	<monogr>
		<title level="m" type="main">Universal sentence encoder</title>
		<author>
			<persName><forename type="first">D</forename><surname>Cer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Hua</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Limtiaco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">S</forename><surname>John</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Constant</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Guajardo-Cespedes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Yuan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Tar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Sung</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Strope</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Kurzweil</surname></persName>
		</author>
		<idno>CoRR abs/1803.11175</idno>
		<ptr target="http://arxiv.org/abs/1803.11175.arXiv:1803.11175" />
		<imprint>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">Deep unordered composition rivals syntactic methods for text classification</title>
		<author>
			<persName><forename type="first">M</forename><surname>Iyyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Manjunatha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Boyd-Graber</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Daumé</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Iii</forename></persName>
		</author>
		<idno type="DOI">10.3115/v1/P15-1162</idno>
		<ptr target="https://aclanthology.org/P15-1162.doi:10.3115/v1/P15-1162" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing</title>
		<title level="s">Long Papers</title>
		<meeting>the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing<address><addrLine>Beijing, China</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2015">2015</date>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="1681" to="1691" />
		</imprint>
	</monogr>
	<note>Association for Computational Linguistics</note>
</biblStruct>

<biblStruct xml:id="b26">
	<monogr>
		<title level="m" type="main">Annoy: Approximate Nearest Neighbors in C++/Python</title>
		<author>
			<persName><forename type="first">E</forename><surname>Bernhardsson</surname></persName>
		</author>
		<ptr target="https://pypi.org/project/annoy/,pythonpackageversion1.13.0" />
		<imprint>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<monogr>
		<title level="m" type="main">Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs</title>
		<author>
			<persName><forename type="first">Y</forename><forename type="middle">A</forename><surname>Malkov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">A</forename><surname>Yashunin</surname></persName>
		</author>
		<idno>CoRR abs/1603.09320</idno>
		<ptr target="http://arxiv.org/abs/1603.09320.arXiv:1603.09320" />
		<imprint>
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b28">
	<analytic>
		<title level="a" type="main">A primer on contrastive pretraining in language processing: Methods, lessons learned, and perspectives</title>
		<author>
			<persName><forename type="first">N</forename><surname>Rethmeier</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Augenstein</surname></persName>
		</author>
		<idno type="DOI">10.1145/3561970</idno>
		<ptr target="https://doi.org/10.1145/3561970.doi:10.1145/3561970" />
	</analytic>
	<monogr>
		<title level="j">ACM Comput. Surv</title>
		<imprint>
			<biblScope unit="volume">55</biblScope>
			<biblScope unit="page">17</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b29">
	<analytic>
		<title level="a" type="main">Distance metric learning for large margin nearest neighbor classification</title>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">Q</forename><surname>Weinberger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Blitzer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">K</forename><surname>Saul</surname></persName>
		</author>
		<ptr target="https://proceedings.neurips.cc/paper/2005/hash/a7f592cef8b130a6967a90617db5681b-Abstract.html" />
	</analytic>
	<monogr>
		<title level="m">Advances in Neural Information Processing Systems 18</title>
				<meeting><address><addrLine>Vancouver, British Columbia, Canada</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2005">December 5-8, 2005. 2005</date>
			<biblScope unit="page" from="1473" to="1480" />
		</imprint>
	</monogr>
	<note>Neural Information Processing Systems, NIPS 2005</note>
</biblStruct>

<biblStruct xml:id="b30">
	<monogr>
		<author>
			<persName><forename type="first">L</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Huang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Jiao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Jiang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Majumder</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Wei</surname></persName>
		</author>
		<idno type="DOI">10.48550/ARXIV.2212.03533</idno>
		<idno type="arXiv">arXiv:2212.03533</idno>
		<ptr target="https://doi.org/10.48550/arXiv.2212.03533.doi:10.48550/ARXIV.2212.03533" />
		<title level="m">Text embeddings by weakly-supervised contrastive pre-training</title>
				<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
