<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Team Sharingans at SimpleText: Fine-Tuned LLM based approach to Scientific Text Simplification</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Syed</forename><forename type="middle">Muhammad</forename><surname>Ali</surname></persName>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Computer Science Program</orgName>
								<orgName type="department" key="dep2">Dhanani School of Science and Engineering</orgName>
								<orgName type="institution">Habib University</orgName>
								<address>
									<postCode>-75290</postCode>
									<settlement>Karachi</settlement>
									<country key="PK">Pakistan</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Hammad</forename><surname>Sajid</surname></persName>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Computer Science Program</orgName>
								<orgName type="department" key="dep2">Dhanani School of Science and Engineering</orgName>
								<orgName type="institution">Habib University</orgName>
								<address>
									<postCode>-75290</postCode>
									<settlement>Karachi</settlement>
									<country key="PK">Pakistan</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Owais</forename><surname>Aijaz</surname></persName>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Computer Science Program</orgName>
								<orgName type="department" key="dep2">Dhanani School of Science and Engineering</orgName>
								<orgName type="institution">Habib University</orgName>
								<address>
									<postCode>-75290</postCode>
									<settlement>Karachi</settlement>
									<country key="PK">Pakistan</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Owais</forename><surname>Waheed</surname></persName>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Computer Science Program</orgName>
								<orgName type="department" key="dep2">Dhanani School of Science and Engineering</orgName>
								<orgName type="institution">Habib University</orgName>
								<address>
									<postCode>-75290</postCode>
									<settlement>Karachi</settlement>
									<country key="PK">Pakistan</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Faisal</forename><surname>Alvi</surname></persName>
							<email>faisal.alvi@sse.habib.edu.pk</email>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Computer Science Program</orgName>
								<orgName type="department" key="dep2">Dhanani School of Science and Engineering</orgName>
								<orgName type="institution">Habib University</orgName>
								<address>
									<postCode>-75290</postCode>
									<settlement>Karachi</settlement>
									<country key="PK">Pakistan</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Abdul</forename><surname>Samad</surname></persName>
							<email>abdul.samad@sse.habib.edu.pk</email>
							<affiliation key="aff0">
								<orgName type="department" key="dep1">Computer Science Program</orgName>
								<orgName type="department" key="dep2">Dhanani School of Science and Engineering</orgName>
								<orgName type="institution">Habib University</orgName>
								<address>
									<postCode>-75290</postCode>
									<settlement>Karachi</settlement>
									<country key="PK">Pakistan</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Team Sharingans at SimpleText: Fine-Tuned LLM based approach to Scientific Text Simplification</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">57703BDF29744DB005E990E43DA69A36</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:57+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Large Language Models</term>
					<term>GPT-3.5 Turbo</term>
					<term>Elastic Search</term>
					<term>BERT</term>
					<term>Text simplification</term>
					<term>SimpleText</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>This paper reports Habib University's Team Sharingans' participation in the CLEF 2024 SimpleText track, which aims to simplify scientific texts for improved readability and comprehension for non-experts. Our goal is to use state-of-the-art language models for simple yet accurate explanations of scientific texts for the general public. Our solution is based on a multi-step approach utilizing the GPT-3.5 model to solve Tasks 1, 2, and 3 i.e. passage extraction, identification and explanation of difficult concepts, and summarization. Our approach for Task 1 involved sentence embedding-based vector database for narrowing the corpus, MS-Marco for document ranking, and GPT-3.5 for selecting informative passages. For Task 2, we fine-tuned the GPT-3.5 model to identify and explain difficult terms and generate explanations. For Task 3 also, we fine-tuned the GPT-3.5 model with a specific prompt to simplify given scientific abstracts and sentences. The effectiveness of our approach was assessed based on the quality of results, demonstrating the potential of advanced language models in making scientific education more accessible to the general public. Our solution proposes using fine-tuned large language models as a reliable source for scientific education.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Scientific literature often presents a formidable barrier to understanding for individuals outside specialized fields due to its complexity and technical language. Recognizing this challenge, the CLEF 2024 SimpleText Lab aims to enhance accessibility by simplifying scientific texts and producing easier comprehension for a wider audience. This pursuit is divided into three tasks, each targeting different aspects of text simplification.</p><p>• Task1: What is in (or out)? Selecting passages to include in a simplified summary <ref type="bibr" target="#b0">[1]</ref>. • Task 2: What is unclear? Difficult concept identification and explanation (definitions, abbreviation deciphering, context, applications,..) <ref type="bibr" target="#b1">[2]</ref>. Task 2.1: Extract difficult keywords from the selected paragraph. Task 2.2: Provide a brief definition of the extracted keywords. • Task 3: Rewrite this! Given a query, simplify passages from scientific abstracts <ref type="bibr" target="#b2">[3]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Literature Review</head><p>We review and analyze the approaches of the teams who participated in CLEF Simple Text 2023. Specifically, the approaches of teams whose models were among the top-scoring models in their respective tasks are discussed.</p><p>For Task 01, the Elsevier <ref type="bibr" target="#b3">[4]</ref> team fine-tuned the bi-encoder and cross-encoder ranking models for ranking documents given a query in order of their relevance. Specifically, they use the Dense Passage Retrieval model. The AIIR and LIAAD Labs <ref type="bibr" target="#b4">[5]</ref> proposed five systems for this task, including cross-encoder with and without fine-tuning, Sentence-BERT bi-encoder models, and traditional IR models like TF-IDF combined with PL2.</p><p>For Task 2.1 and Task 2.2, diverse methodologies and tools were employed. The UBO <ref type="bibr" target="#b5">[6]</ref> team utilized the pke package, along with statistical and graphical approaches such as YAKE!, TextRank, and Tf-Idf, to extract keywords from the provided sentences, and subsequently extracted definitions from Wikipedia for Task 2.2. The Sinai <ref type="bibr" target="#b6">[7]</ref> team used the GPT-3 auto-regressive model for lexical complexity prediction. They presented an approach for identifying the most challenging terms in the text which leveraged zero-shot and few-shot learning prompts to assess term difficulty.</p><p>For Task 03, the UBO <ref type="bibr" target="#b5">[6]</ref> team employed the SimpleT5 model and trained it on the datasets. Subsequently, they utilized this trained model to generate simplified text from the test dataset. They also utilized the BLOOM model, albeit requiring sample data input due to its few-shot learning nature, and similarly applied it to generate simplified text. AIIR and LIAAD <ref type="bibr" target="#b4">[5]</ref> team, utilized OpenAI's Davinci model with a straightforward prompt for text rewriting.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Approaches</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Task 1</head><p>For Task 01, we had • A Corpus of DBLP abstracts. An Elastic search index and a vector database with sentence embedding scores were provided through APIs for querying the corpus. • An input file containing input queries and their topic texts.</p><p>• A file containing the quality relevance scores of abstracts w.r.t topics on a scale of 0-2 for 25 topics and 64 queries. • A set of files containing the topics selected from The Guardian newspaper and Tech Xplore website along with their URLs and article content.</p><p>The approaches used for this task are given:</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.1.">MS-Marco + GPT-3.5 based re-ranking</head><p>In this approach, we utilized the vector database for querying the top 100 relevant abstracts from the corpus. To generate the query for the API, we used the query text. If the query text was a long phrase or a sentence, then the "abstracts" parameter was used in the query to search inside abstracts. In case the query text was a short phrase the "title" parameter was used. Table <ref type="table" target="#tab_0">1</ref> shows examples of phrases and the generated queries. Then, the abstracts retrieved from the search were ranked using the "msmarco-MiniLM-L12" cross encoder w.r.t the query text as well as the topic text. The query and the topic texts were concatenated together by a period and a white space ". ". The top 10 re-ranked abstracts were provided with a fine-tuned GPT-3.5 model to select the most relevant abstract with reference to query text, and then extract the most relevant passage from the selected abstract. This two-step process in shown in Table <ref type="table">2</ref>.</p><p>The GPT-3.5 model was fine-tuned on manually curated training data. The hyperparameters are given in Table <ref type="table">3</ref>.</p><p>The training data used to fine-tune GPT-3.5 comprised several examples, each having 10 manually selected abstracts as input and a manually extracted passage as the output. Finally, the runs for this task were submitted with the run id "Sharingans_Task1_marco-GPT3". Table <ref type="table">2</ref> Prompts used for the two-step process to select the most relevant passage from the re-ranked abstracts</p><p>Step Prompt</p><p>Selecting the abstract Select the abstract which gives the most relevant definition/explanation for the following term/phrase: (list of 10 abstracts)</p><p>Extracting the passage Extract the most relevant part of abstract explaining the given term/phrase in light of the topic (topic). (abstract)</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 3</head><p>Experimental setup for GPT-3.5 Turbo for Task 1</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Model Name Examples Epochs Batch Size learning_rate_multiplier</head><p>GPT-3.5 Turbo 30 3 1 2</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.2.">Keyword extraction with RAKE and ColBert+GPT-3.5 based re-ranking</head><p>For this approach, we utilized RAKE <ref type="bibr" target="#b7">[8]</ref>, a keyword extraction algorithm, to identify relevant terms for querying the corpus. We provided RAKE with the topic and query text to extract relevant keywords from them. Then we used these terms to generate a query for the Elastic Search index, which in turn narrowed down the corpus to a subset of documents. This subset was further refined using the ColBERT neural ranker <ref type="bibr" target="#b8">[9]</ref> to choose the top 10 most relevant ones, given the topic text and the query. Finally, GPT-3.5 helped in selecting the most informative and concise passage for inclusion in the summary. We did not include runs for this approach since the MS-Marco + GPT-3.5 approach worked better which has been described above.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Task 2</head><p>For Task 02, we were provided with:</p><p>• A train file, along with some manual run files, that included the fields of the "source sentences" along with their corresponding extracted terms, definitions, difficulty, and explanations with positive and negative definitions as an indicator for what an acceptable definition should look like. • A validation file for testing the trained model with similar entries as that in the train file.</p><p>• A test dataset, having around 500 plus entries, consisting of just the source sentences for the evaluation of the model's output.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.1.">GPT-3.5 Turbo based approach</head><p>To accomplish Task 02, we fine-tuned the GPT-3.5 Turbo model on the train dataset. GPT-3.5 Turbo is an advanced language model developed by OpenAI, part of the broader GPT-3.5 series. Due to its enhanced Natural Language understanding and generation ability, we decided to use this model specifically for this task. Table <ref type="table" target="#tab_1">4</ref> represents the details of the fine-tuning of our GPT-3.5 model. The effective use of 3 epochs alongside a single batch size allowed the dataset to be passed into the model only three times, which is relatively less for such a task. However, setting a batch size of one alongside a learning rate multiplier of 2 allowed a more stable adjustment of weights. We used a unit batch size with so that it has a regularizing effect to prevent our model from overfitting on the small dataset. The idea of a small batch size was to have the model learn before having to see all the data.</p><p>For this task, we observed good performance on the test set. This indicates that the mini-batch learning approach, although unconventional with a batch size of one, was effective in optimizing the model both for term extraction and for generating definitions. The small batch size and learning rate multiplier helped achieve a better generalization over the small dataset.</p><p>We passed the training dataset as a query to the GPT model, which consisted of the keywords, difficulty scores, and their definitions respectively for each sub-task to fine-tune the model. The finetuned model was then used to extract keywords from the source sentences, assign them difficulty scores, generate definitions, and store them in a data frame. Finally, we converted the output into a JSON file as required for the submission with the runid "Sharingans_task2.2_GPT" for both sub-tasks.</p><p>The effectiveness of this method can be attributed to the tailored approach to the specific requirements of Task 2. The model's performance validated our decision, demonstrating that even with small batches, careful tuning can achieve desirable outcomes.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 5</head><p>Sample prompt to generate definition and explanation of an extracted term</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Term Difficulty Query</head><p>Digital Assistant m Generate a definition of the term: "Digital Assistant" having the difficulty score: "m" and provide an explanation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.2.">KeyBert, Classification, and Prompt Engineering based approach</head><p>Our second approach for Task 02 included utilizing the "KeyBert Model" <ref type="bibr" target="#b9">[10]</ref> for keyword extraction, Random Forest Classification for assigning difficulties, and Prompt Engineering through Mistral-7B-Instruct-v0.3 Large Language Model (LLM). The KeyBert model leverages BERT embeddings to create/extract keywords and key phrases. We utilized it to extract keywords from the source sentences. We then used Random Forest Classification on the extracted keywords with a training and test split of 80%-20%. Through the use of Mistral-7B-Instruct-v0.3 Large Language Model (LLM), we sent requests through the Hugging Face's API to perform prompt engineering to get the required definitions as the response.</p><p>We did not submit the runs of this approach due to a major limitation of Hugging Face API that restricts the number of requests to around 500 queries which were far less than the number of terms extracted. This would result in an extremely low score in case this run was submitted.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.">Task 3 3.3.1. Data Description:</head><p>For Task 03, we were provided with:</p><p>• A parallel corpora of training data comprising of source sentences/abstracts along with their query texts and simplified versions. • Test data which included source sentences (task 3.1) and source abstracts (task 3.2) and query text for each of the sentence/abstract.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.2.">Fine-Tuned GPT-3.5 Turbo</head><p>In this approach, we used OpenAI's GPT-3.5 model, since it has great summarizing capabilities. We first experimented with fine-tuning the GPT-3.5 model, using the training data of task 3.1 and task 3.2 all together and shuffling the sentences and abstracts randomly. Then we experimented with finetuning the model for Task 3.1 and Task 3.2 separately. Utilizing the EASSE scoring <ref type="bibr" target="#b10">[11]</ref>, we found that fine-tuning the model for task 3.1 and task 3.2 separately yielded slightly better results as compared to fine-tuning the model with data for both tasks altogether, especially for task 3.2. The method to train the model for task 3.1 and task 3.2 however remained the same which is discussed below. The fine-tuning process was similar for both of the subtasks. We provided the model with a prompt to simplify the sentences/abstracts along with the sentences/abstracts, the query text, and the reference output sentences/abstracts. The hyperparameters used for fine-tuning the model are given in Table <ref type="table">6</ref> and Table <ref type="table">7</ref> for tasks 3.1 and 3.2 respectively.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 6</head><p>Experimental setup for GPT-3.5 Turbo for Task 3.1</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Model Name</head><p>Queries Epochs Batch Size learning_rate_multiplier GPT-3.5 Turbo 958 3 4 2</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 7</head><p>Experimental setup for GPT-3.5 Turbo for Task 3.2</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Model Name</head><p>Queries Epochs Batch Size learning_rate_multiplier GPT-3.5 Turbo 175 3 1 2</p><p>After training the model, we provided the same prompt with the test data (sentence/abstract and query text) to generate the simplified sentences/abstracts. These simplified sentences/abstracts were then evaluated using the EASSE score and were submitted with the runid "Sharingans_task3.1_finetuned" and "Sharingans_task3.2_finetuned" for task 3.1 and task 3.2 respectively.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.3.">Fine-Tuned Bart Sequence-to-Sequence Model</head><p>In this approach, we utilized Meta's BART sequence to sequence pre-trained model. BART was introduced by Meta (Facebook) as a Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension <ref type="bibr" target="#b11">[12]</ref>. Specifically, we use the "BART-large-cnn" sequenceto-sequence model using the Hugging Face Transformer library. We first tokenized the training input sentences/abstracts and the reference outputs and used them to fine-tune the model. Then we provided the model with test data to generate simplified sentences. We observed that although the model performed well in summarizing the longer sentences and abstracts, it did not simplify them in many cases. Moreover, for shorter sentences, the model generated outputs that were very similar or even the same as the original sentence. Since this model did not perform well as compared to the GPT-3.5 model, we did not include runs for this model.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.4.">Fine-Tuned Pegasus Sequence-to-Sequence Model</head><p>In this approach, we utilized the PEGASUS model for text simplification. PEGASUS is a pre-trained encoder-decoder model tailored specifically for abstractive text simplification <ref type="bibr" target="#b12">[13]</ref>. We fine-tune this model via the Hugging Face Transformer library using the same approach as for BART. This model provides slightly better results than BART but still lags behind OpenAI's GPT-3.5.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Results and Discussion</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Task 01</head><p>Table <ref type="table" target="#tab_2">8</ref> shows the score of the run submitted for task 01. The scores are fairly low for our submitted approach. Specifically, we observe that the model has a very low precision. This suggests a loophole in our MSMarco-GPT-based reranking approach. We hypothesize that this is due to the manual curation of data for fine-tuning the GPT-3.5 model. We also hypothesize that models such GPT-3.5 might be limited in their ability to extract a relevant passage from the given data. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Task 02</head><p>Our run for Task 02 retrieved a total of 1,501 keywords, assigned them difficulty scores, and later on generated their definitions and explanations. Table <ref type="table" target="#tab_3">9</ref> shows our official results for our Task 02 run. The overall recall metric indicates the proportion of terms (independently from the difficulty) that were found while the precision metric indicates how accurately were the terms labeled as difficult. The ability of GPT-3.5 Turbo to effectively comprehend Natural Language tasks can be concluded from the overall scores of recall and precision indicating that our fine-tuned model was able to extract keywords and distinguish their difficulties quite satisfactorily. The BLEU scores, on the other hand, computed with n-grams equal to 1, 2, 3, and 4 lack precision on a higher number of n-grams. This may potentially be because the words chosen by our fine-tuned model to complete the definitions were not quite in line with the actual definitions used as reference, however, the idea conveyed by the definition was correct to an extent based on manual interpretation.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Task 03</head><p>Tables <ref type="table" target="#tab_5">10 and 11</ref> show the scores for the run submitted for task 3.1 and task 3.2 respectively. Since an identical approach was taken for tasks 3.1 and 3.2 for these runs, they exhibit very similar scores. We observe that the fine-tuned GPT-3.5 model scores fairly high in the scoring metrics. The FKGL, BLEU and Lexical complexity score for task 3.1 and 3.2 are similar. The SARI score and compression ratio are slightly higher in task 3.2 which indicates that documents in task 3.2 had to be modified more than the relatively smaller sentences in task 3.1 for simplification. The FKGL scores for both sub-tasks however indicate that the text can be further simplified. But this should be done without loss of information of the original text. Overall, this suggests that our approach has fairly good potential for scientific text simplification and summarization. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusion</head><p>We utilized several models and techniques to solve SimpleText tasks 1, 2 and 3. For Task 1, we resorted to extracting keywords, sorting through documents, and ranking their relevance, then finally using GPT-3.5 to pick out the most relevant passages for our summary. Task 2 mostly involved fine-tuning the GPT-3.5 Turbo model to generate complex definitions. We also experimented with the KeyBert model to extract words, Random Forest classification to assign complexities and then generating definitions via prompt engineering using the MISTRAL 7-B model. However, the GPT approach turned out to be much better. Since Task 3 was text-generation based, we utilized curated data to finetune the GPT API and generate summaries. We also experimented with the Pegasus and BART model for abstractive summarization, GPT-3.5 exhibited a better performance. Conclusively, we found that out of all approaches, Open AI's GPT 3.5 language model gave the best results for task 2 and task 3. However, the pipeline for Task 01 which utilized GPT-3.5 did not perform well. Further research can be done to investigate the cause of poor performance of the Marco-GPT pipeline as well as to further improve the approaches for Tasks 2 and 3 for better simplification of scientific texts.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Examples of queries generated for vector database based on the length of query text</figDesc><table><row><cell>Sentence/Phrase</cell><cell cols="2">Corpus Parameter Query</cell></row><row><cell>Digital Assistant</cell><cell>title</cell><cell>https://guacamole.univ-avignon.</cell></row><row><cell></cell><cell></cell><cell>fr/stvir_test?corpus=title&amp;phrase=</cell></row><row><cell></cell><cell></cell><cell>Digitalassistant&amp;length=100</cell></row><row><cell>how AI systems, especially virtual assis-</cell><cell>abstract</cell><cell>https://guacamole.univ-avignon.</cell></row><row><cell>tants, can perpetuate gender stereotypes</cell><cell></cell><cell>fr/stvir_test?corpus=abstract&amp;</cell></row><row><cell></cell><cell></cell><cell>phrase=howAIsystems,</cell></row><row><cell></cell><cell></cell><cell>especiallyvirtualassistants,</cell></row><row><cell></cell><cell></cell><cell>canperpetuategenderstereotypes&amp;</cell></row><row><cell></cell><cell></cell><cell>length=100</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 4</head><label>4</label><figDesc>Experimental setup for GPT-3.5 Turbo for Task 2</figDesc><table><row><cell>Model Name</cell><cell cols="4">Queries Epochs Batch Size learning_rate_multiplier</cell></row><row><cell>GPT-3.5 Turbo</cell><cell>501</cell><cell>3</cell><cell>1</cell><cell>2</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 8</head><label>8</label><figDesc>Run scores for Task 01</figDesc><table><row><cell>runid</cell><cell cols="6">MRR Precision 10 Precision 20 NDCG10 NDCG20 Bpref MAP</cell></row><row><cell>Sharingans_Task1</cell><cell>0.6667</cell><cell>0.0667</cell><cell>0.0333</cell><cell>0.1149</cell><cell>0.0797</cell><cell>0.0107 0.0107</cell></row><row><cell>_marco-GPT3</cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 9</head><label>9</label><figDesc>Run scores for Task 02</figDesc><table><row><cell>runid</cell><cell>recall</cell><cell></cell><cell>precision</cell><cell></cell><cell>BLEU</cell></row><row><cell></cell><cell cols="2">overall average difficult_terms</cell><cell></cell><cell>n1</cell><cell>n2</cell><cell>n3</cell><cell>n4</cell></row><row><cell cols="2">Sharingans 0.472222 0.530246</cell><cell>0.544811</cell><cell>0.595361</cell><cell cols="2">0.225719 0.103904 0.0300 0.0160</cell></row><row><cell>_Task2.2_GPT</cell><cell></cell><cell></cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 10</head><label>10</label><figDesc>Run scores for Task 3.1</figDesc><table><row><cell>runid</cell><cell cols="4">Count FKGL SARI BLEU</cell><cell>Lexical</cell><cell cols="2">Compression Levenshtein</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Complexity</cell><cell>ratio</cell><cell>Similarity</cell></row><row><cell>Sharingans_task3.1</cell><cell>578</cell><cell>11.39</cell><cell>38.61</cell><cell>18.18</cell><cell>8.70</cell><cell>0.83</cell><cell>0.77</cell></row><row><cell>_finetuned</cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_5"><head>Table 11</head><label>11</label><figDesc>Run scores for Task 3.2</figDesc><table><row><cell>runid</cell><cell cols="4">Count FKGL SARI BLEU</cell><cell>Lexical</cell><cell cols="2">Compression Levenshtein</cell></row><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>Complexity</cell><cell>ratio</cell><cell>Similarity</cell></row><row><cell>Sharingans_task3.2</cell><cell>103</cell><cell>11.53</cell><cell>40.96</cell><cell>18.29</cell><cell>8.80</cell><cell>1.2</cell><cell>0.65</cell></row><row><cell>_finetuned</cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell></row></table></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>We would like to acknowledge the support provided by the Office Of Research (OoR) at Habib University, Karachi, Pakistan for funding this project through the internal research grant IRG-2235. We would also like to thank SimpleText@CLEF-2024 chairs for their guidance and organization.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Overview of the CLEF 2024 SimpleText task 1: Retrieve passages to include in a simplified summary</title>
		<author>
			<persName><forename type="first">E</forename><surname>Sanjuan</surname></persName>
		</author>
		<ptr target="CEUR-WS.org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2024)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Overview of the CLEF 2024 SimpleText task 2: Identify and explain difficult concepts</title>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">M D</forename><surname>Nunzio</surname></persName>
		</author>
		<ptr target="CEUR-WS.org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2024)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Overview of the CLEF 2024 SimpleText task 3: Simplify scientific text</title>
		<author>
			<persName><forename type="first">L</forename><surname>Ermakova</surname></persName>
		</author>
		<ptr target="CEUR-WS.org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2024)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Elsevier at simpletext: Passage retrieval by fine-tuning gpl on scientific documents</title>
		<author>
			<persName><forename type="first">A</forename><surname>Capari</surname></persName>
		</author>
		<ptr target="CEUR-WS.org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2023)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Aiir and liaad labs systems for clef 2023 simpletext</title>
		<author>
			<persName><forename type="first">B</forename><surname>Mansouri</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2023)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2023">2023</date>
			<biblScope unit="volume">3497</biblScope>
			<biblScope unit="page" from="253" to="253" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Ubo team @ clef simpletext 2023 track for task 2 and 3 -using ia models to simplify scientific texts</title>
		<author>
			<persName><forename type="first">Q</forename><surname>Dubreuil</surname></persName>
		</author>
		<ptr target="CEUR-WS.org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2023)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Sinai participation in simpletext task 2 at clef 2023: Gpt-3 in lexical complexity prediction for general audience</title>
		<author>
			<persName><forename type="first">J</forename><surname>Ortiz-Zambrano</surname></persName>
		</author>
		<ptr target="CEUR-WS.org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of the Conference and Labs of the Evaluation Forum (CLEF 2023)</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<monogr>
		<title level="m" type="main">Automatic Keyword Extraction from Individual Documents</title>
		<author>
			<persName><forename type="first">S</forename><surname>Rose</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Engel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Cramer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Cowley</surname></persName>
		</author>
		<idno type="DOI">10.1002/9780470689646.ch1</idno>
		<imprint>
			<date type="published" when="2010">2010</date>
			<biblScope unit="page" from="1" to="20" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<monogr>
		<title level="m" type="main">Colbert: Efficient and effective passage search via contextualized late interaction over bert</title>
		<author>
			<persName><forename type="first">O</forename><surname>Khattab</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zaharia</surname></persName>
		</author>
		<ptr target="https://arxiv.org/abs/2004.12832.arXiv:2004.12832" />
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<monogr>
		<title level="m" type="main">Maartengr/keybert: Bibtex</title>
		<author>
			<persName><forename type="first">M</forename><surname>Grootendorst</surname></persName>
		</author>
		<idno type="DOI">10.5281/zenodo.4461265</idno>
		<ptr target="https://doi.org/10.5281/zenodo.4461265.doi:10.5281/zenodo.4461265" />
		<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">EASSE: Easier automatic sentence simplification evaluation</title>
		<author>
			<persName><forename type="first">F</forename><surname>Alva-Manchego</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Martin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Scarton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Specia</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/D19-3009</idno>
		<ptr target="https://aclanthology.org/D19-3009.doi:10.18653/v1/D19-3009" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations, Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">S</forename><surname>Padó</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Huang</surname></persName>
		</editor>
		<meeting>the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations, Association for Computational Linguistics<address><addrLine>Hong Kong, China</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="49" to="54" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension</title>
		<author>
			<persName><forename type="first">M</forename><surname>Lewis</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Goyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ghazvininejad</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Mohamed</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Levy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Stoyanov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Zettlemoyer</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2020.acl-main.703</idno>
	</analytic>
	<monogr>
		<title level="m">Annual Meeting of the Association for Computational Linguistics</title>
				<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="7871" to="7880" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<monogr>
		<author>
			<persName><forename type="first">J</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Saleh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">J</forename><surname>Liu</surname></persName>
		</author>
		<idno>ArXiv abs/1912.08777</idno>
		<ptr target="https://api.semanticscholar.org/CorpusID:209405420" />
		<title level="m">Pegasus: Pre-training with extracted gap-sentences for abstractive summarization</title>
				<imprint>
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
