<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Assembling four Open Web Search Components</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">T</forename><forename type="middle">U</forename><surname>Dresden</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Linda</forename><surname>Erben</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Maria</forename><surname>Hampel</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Malte-Christian</forename><surname>Kuns</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Vincent</forename><surname>Melisch</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Wilhelm</forename><surname>Pertsch</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Lina</forename><surname>Razouk</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Reiner</forename><surname>Stolle</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Robert</forename><forename type="middle">Thomas</forename><surname>Thoss</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Tuan</forename><surname>Giang</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Julius</forename><surname>Gonsior</surname></persName>
							<affiliation key="aff0">
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
							<affiliation key="aff1">
								<orgName type="department">Database Research Group</orgName>
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden, Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Supervision of projects</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Anja</forename><surname>Reusch</surname></persName>
							<affiliation key="aff1">
								<orgName type="department">Database Research Group</orgName>
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden, Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
							<affiliation key="aff3">
								<orgName type="department">Supervision of projects</orgName>
							</affiliation>
						</author>
						<author>
							<persName><forename type="middle">Genre</forename><surname>Classificaion</surname></persName>
							<affiliation key="aff1">
								<orgName type="department">Database Research Group</orgName>
								<orgName type="institution">Technische Universität Dresden</orgName>
								<address>
									<settlement>Dresden, Dresden</settlement>
									<country key="DE">Germany</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff2">
								<orgName type="department">International Workshop on Open Web Search</orgName>
								<address>
									<addrLine>March 28</addrLine>
									<postCode>2024</postCode>
									<settlement>Glasgow</settlement>
									<country key="GB">Scotland</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Assembling four Open Web Search Components</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">E7F9928488C58CC13D15DB39AFCEEDFB</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T18:17+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Information Retrieval</term>
					<term>Open Web Search</term>
					<term>Genre Classification</term>
					<term>Text Snippet Extraction</term>
					<term>Query Expansion</term>
					<term>Text Features</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>In this work, we present the submission of TU Dresden to WOWS 2024. Four student teams assembled different approaches for Genre Classification, Text Snippet Extraction, Query Expansion, and Text Features. Each implemented component integrates seamlessly into the open web search ecosystem. We present each approach alongside a short evaluation of possible use cases, and hope that our submission will contain viable building blocks for future research to be build on top.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>This report describes the submission of the team at TU Dresden for the Workshop on Open Web Search WOWS 2024 <ref type="bibr" target="#b0">[1]</ref>. The work was conducted during a university-organized hackathon targeted at students. Details about the setup are included in the Appendix in Sec. A. Four teams, consisting of two to three students contributed four components for the open web search ecosystem. We hope that with our submitted components future research on Information Retrieval (IR) can be facilitated.</p><p>In summary, this paper is discusses the following four components: Sec. 2 reports the work of the group Genre Classification, which categorizes web pages based on the intent of the page, such as Discussion or Shopping. In Sec. 3 we detail our the submission for the extraction of text snippets. Here, the goal is to divide long documents into shorter ones and return a list of the best snippets. Sec. 4 provides details on the work of the group Query Expansion, which employed Large Language Modelss (LLMs) to generate more related information or variants for a given query. The results for the extraction of text features is highlighted in Sec. 5. The goal of this component was to quantify syntactic or semantic features of natural language such as the readability of a web page. Finally, Sec. 6 draws the conclusions of all our submissions.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.">Methods</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.1.">Rule-Based Classifier</head><p>The rule-based classifier makes use of a vocabulary list of relevant terms per genre. Comparing the intersection between terms in the genre-specific vocabulary lists, and the terms in the document, the most probable category is the one with the highest intersection. We first remove stop words and subsequently extract the 75 most frequent terms that we compare to the vocabulary lists to classify the genre. We use Snorkel AI <ref type="bibr" target="#b3">[4]</ref> for implementation.</p><p>The rule-based classifier can be adapted to a precision-oriented method, where the most probable genre needs to be better than a threshold compared to the second most probable genre, otherwise the classification result is abstain.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.2.">Multi-Layer Perceptron Classifier</head><p>As a typical Machine Learning based method a neural network was used for classification. As features the web pages were converted into a tf-idf vectorspace. We use the Python library scikitlearn <ref type="bibr" target="#b4">[5]</ref> for the implementation of the Multi-Layer Perceptron classifier. After an empirical hyperparameter search a neural network using a single hidden layer of 50 neurons, ReLU activation function, stochastic gradient descent in the Adam variant using momentum for optimizations, and a constant learning rate of 0.001 was used.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.">Experiments</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.1.">Dataset</head><p>For evaluation we used the Genre-KI-04 dataset <ref type="bibr" target="#b1">[2]</ref>. This includes vocabulary lists, and the following classification categories: articles, discussion, download, help, link lists, portrait (non private), portrait (private), and shop. Details about the genres can be found in the original paper.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Text Snippet Extraction</head><p>Since sophisticated neural ranking models such as cross-encoders generally require a lot of computational effort, a customary retrieval pipeline first retrieves a number of (e.g., 1000) documents using a fast but imprecise retrieval method and then re-ranks those documents using a more precise weighting model <ref type="bibr" target="#b5">[6]</ref>. Cross-encoder as introduced by Nogueira and Cho <ref type="bibr" target="#b5">[6]</ref> are an example for the latter, which are used to calculate scores for query-document pairs. Apart from their comparatively high computational cost, cross-encoders have another disadvantagetheir limited input size. This weakness is typically mitigated by truncating the document once the maximum number of input tokens is reached. The problem of this procedure is that content which is not in the beginning of a document is not taken into account by cross-encoders. As a result, the ranking of documents may be biased towards those that address the query early on.</p><p>In this part, we therefore present a simple method of extracting a number of snippets, i.e., smaller chunks of the document which fit in the cross-encoder as an additional component in a larger retrieval pipeline. Instead of simply truncation documents after a fixed number of tokens, we search for the most relevant passages (ranked snippets) in the document. These ranked snippets are used for the cross-encoder with the goal of a more precise ranking. We show the benefits of this method on two exemplary datasets which contain long documents.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Methodology</head><p>The re-ranking process with ranked snippets consists of five steps. An example of those steps for the re-ranking of n = 3 documents (d 1 , d 2 , and d 3 ) is shown alongside the explanation.</p><p>First, we subdivide all n documents into snippets. The maximum length of those snippets may be chosen arbitrarily-we defaulted to 250 tokens which is the passage size used by Dalton et al. <ref type="bibr" target="#b6">[7]</ref>. The actual length of the snippets may vary since the division process aims to retain context by not separating sentences. For example, we may start with three documents d 1 , d 2 , d 3 . After the first step, each of these documents is divided into several snippets:</p><formula xml:id="formula_0">s 1 1 s 2 1 . . . s l 1 1 , s 1 2 s 2 2 . . . s l 2 2</formula><p>, s 1 3 s 2 3 . . . s l 3 3 where s j i denotes the j-th snippet of document d i for j ∈ { 1, . . . , l i } and l i is the number of snippets of d i .</p><p>In Step 2, we pre-rank all extracted snippets in relation to the query. To accomplish this, we view the set of all snippets of a document as a corpus. From this corpus, we can create a ranking for the query using one of the following weighting models: Term frequency (TF), BM25 or PL2. We do not use cross-encoder for the pre-ranking of documents, because there may be a multitude of snippets per document depending on document length and therefore ranking all snippets using a cross-encoder can drastically slow down the re-ranking process. After this pre-ranking step, our example snippets might be ranked in the following way:</p><formula xml:id="formula_1">s 3 3 &gt; s 2 2 &gt; s 4 2 &gt; s 2 3 &gt; s 1 3 &gt; s 3 1 &gt; s 4 3 &gt; s 1 1 &gt; s 3 2 &gt; s 5 1 &gt; . . . . In</formula><p>Step 3, we can obtain the top k relevant snippets of each document from the pre-ranking, which are later ranked using a cross-encoder. This step ensures that the cross-encoder only needs to rank n • k snippets for n documents instead of all snippets. In order to reduce computational cost, we defaulted to k = 3. In our example, this step results in the following selection:</p><formula xml:id="formula_2">s 3 1 , s 1 1 , s 5 1 , s 2 2 , s 4 2 , s 3 2 , s 3 3 , s 2 3 , s 1 3 .</formula><p>Here, s 4 3 is not selected as one of the top snippets of d 3 since it is the (k + 1)-th snippet of d 3 in the ranking despite being ranked relatively high.</p><p>In Step 4, the top k snippets of all documents are ranked using a cross-encoder (CE). That way, similar to Step 2, we can more accurately deduce which snippets best match the query-but now the ranking is more precise since we used a CE instead of the simple weighting models used in Step 2. An examplary ranking for our snippets might be: s 4  2 &gt; s 3 3 &gt; s 2 2 &gt; s 5 1 &gt; . . . . The final document ranking ensues from this snippet ranking in Step 5, i.e., the document that provided the best snippet is ranked first. Our example documents are therefore ranked in the following way:</p><formula xml:id="formula_3">d 2 &gt; d 3 &gt; d 1 .</formula><p>It should be noted that the goal of this section is to rank documents with regard to a query, and not only passages. Therefore, the result is a ranking of documents. Details on our implementation can be found in Appendix B.1.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Evaluation</head><p>In this section, we conduct tests to study the possible improvements of our cross-encoder re-ranking of top k snippets. As baselines, we use BM25 and the dense retriever MonoT5. All further ranking is performed on the top 20 documents retrieved by these two systems. We evaluate the re-ranking with our TF-ranked snippets. For this, we load the previously saved top 3 snippets for each document. To re-rank the documents, we follow the "weakest link" principle, selecting the minimum TF score among the top 3 snippets. This results in the methods BM25+TF-SP and MonoT5+TF-SP. We denote by +CE that the 3 snippets are further re-ranked by a cross-encoder. In addition, we compare the performance of these systems to the cross-encoder's performance when only evaluating the first snippet of each document (which resembles the naïve application of a cross-encoder). These results are denoted by BM25+CE and MonoT5+CE.</p><p>To measure the performance of the approaches, we utilize normalized discounted cumulative gain at 10 (NDCG@10) and mean reciprocal rank (MRR). We conduct our tests on the ClueWeb12 <ref type="bibr" target="#b7">[8]</ref> and ClueWeb09 <ref type="bibr" target="#b8">[9]</ref> datasets, which differ in document size: ClueWeb12 has an average document size of 5641.7 tokens, and ClueWeb09 has an average document size of 1132.6 tokens. The results for the two datasets are plotted in Fig. <ref type="figure" target="#fig_0">2</ref>. Our approach of cross-encoder re-ranking with TF-pre-ranked snippets achieves the best performance in both metrics across all our tested datasets (see Appendix B.2 for diagrams of other evaluated datasets). The impact of our TF-ranked snippet pre-selection is relatively high on ClueWeb12 with long documents, while it is more marginal on ClueWeb09. This highlights the importance of snippet pre-selection for longer documents. ClueWeb09 consists of approximately 6 snippets, and ClueWeb12 consists of approximately 23 snippets per document. We assume for our naïve snippet generation approach that information is equally spread throughout a document. A cross-encoder taking the first snippet as input is assumed to capture more relevant information of a document with a size that is closer to the cross-encoder input size. This also explains why MonoT5 scores better on the shorter dataset, especially in comparison to BM25, since MonoT5 also suffers from a limited input size. This proves that there is a need to address the problem of limited input size, especially in large documents like those in ClueWeb12. That information is not always equally spread over a document, like we assumed for our snippet generation, can be concluded when comparing Figs. <ref type="figure" target="#fig_1">4b and 4c</ref>. This raises the need of a more advanced approach for snippet generation.</p><formula xml:id="formula_4">0 0.2 0.4 0.6 BM25 MonoT5 BM25+TF-SP MonoT5+TF-SP BM25+TF-SP+CE MonoT5+TF-SP+CE BM25+CE MonoT5+CE Performance (a) ClueWeb09 (2011) 0 0.2 0.4 0.6 BM25 MonoT5 BM25+TF-SP MonoT5+TF-SP BM25+TF-SP+CE MonoT5+TF-SP+CE BM25+CE MonoT5+CE Performance (b) ClueWeb12 (2013)</formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.">Summary</head><p>Overall, our results show that selecting top-k pre-ranked snippets is a viable approach to tackle the problem of input size restrictions on Transformer-based retrieval systems. Especially, crossencoders can benefit from this approach since they are inefficient on large documents. Further testing to edge out efficiency and reduce context loss with snippets will be required. Also, it would be beneficial to test multiple pre-ranking systems and values of k for top-k snippet selection. The code for this part can be found in the accompanying repository<ref type="foot" target="#foot_1">2</ref> .</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Query Expansion and User Query Variants using Large Language Models</head><p>Query Expansion and User Query Variants are two common methods to increase the recall of an IR system <ref type="bibr" target="#b9">[10,</ref><ref type="bibr" target="#b10">11,</ref><ref type="bibr" target="#b11">12]</ref>. Both methods are based on modifying the query to include more related keywords, thereby causing the IR system to score relevant documents higher. In addition to conventional techniques such as the Kullback-Leibler Divergence (KL) <ref type="bibr" target="#b12">[13,</ref><ref type="bibr" target="#b13">14]</ref> or Relevance Model 3 (RM3) <ref type="bibr" target="#b14">[15]</ref>, recent approaches have embraced the utilization of Large Language Models (LLMs). In this part, we employ various prompts to generate improved and expanded queries using LLMs <ref type="bibr" target="#b15">[16,</ref><ref type="bibr" target="#b16">17]</ref>, in particular, GPT-3.5<ref type="foot" target="#foot_2">3</ref> , Llama2 <ref type="bibr" target="#b17">[18]</ref> and FLAN-UL2 <ref type="bibr" target="#b19">[19]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Methodology</head><p>LLMs have previously been in use for the task of query expansion and studies have been conducted using various methods and language models <ref type="bibr" target="#b15">[16,</ref><ref type="bibr" target="#b20">20,</ref><ref type="bibr" target="#b21">21,</ref><ref type="bibr" target="#b22">22]</ref>. Wang et al. <ref type="bibr" target="#b21">[21]</ref> employ query2doc, a method where the LLM generates a document for a given query, which is then used as Pseudo-Relevance Feedback (PRF). Model parameters, for GPT-3.5 the parameter min. tokens is unavailable in the API previous work demonstrates improvements across different datasets. In order to weigh the original query more heavily, multiple concatenations of the original query q with a single instance of the LLM's output may be used <ref type="bibr" target="#b15">[16,</ref><ref type="bibr" target="#b21">21]</ref>. The resulting expanded query is of the form q ′ = concat({q} * n, LLM out ), where n is the number of times q is concatenated with itself, and LLM out is the LLM-generated version of q. We adopt this approach in our work with n = 5 and employ modified versions of the prompt types suggested by Jagerman et al. <ref type="bibr" target="#b15">[16]</ref> While GPT-3.5 and FLAN were prompted to generate five similar queries, Llama was asked to answer the query. Apart from this difference, the prompts for in all experiments are similar and comparable.</p><p>Initially, the query, along with the prompt, is fed into the LLM, and its response is concatenated with the original query (n = 5). For evaluation, the Recall@1000 metric of the original and modified queries is compared on the given dataset using BM25. The specific LLMs in use are GPT-3.5 45 , Llama 2 <ref type="bibr" target="#b17">[18]</ref> and FLAN-UL2 <ref type="bibr" target="#b19">[19]</ref>. Llama 2 and FLAN-UL2 were run locally. Table <ref type="table" target="#tab_0">1</ref> shows the model configurations that we used in our experiments. The temperature values were chosen empirically in a way such that model outputs are roughly similar. The lower and upper token limitations prevent generation edge cases such as empty responses or endless output, while still allowing for expressive responses. Local models had to fit GPU memory constraints. Hence, we had to employ the quantized versions of the models. We conducted experiments for the prompt types presented above: CoT, Q2E/FS, and Q2E/ZS. While FLAN-UL2 and GPT-3.5 can be prompted without further changes, Llama 2 requires the chat-prompt to follow a pre-defined format, our version of which can be found in the project's repository 6 . We utilize BM25 as the retrieval system in the default configuration of the Tira-framework <ref type="bibr" target="#b23">[23]</ref>. The query expansion baselines consist of an unmodified BM25, BM25 with Kullback-Leibler Divergence (KL) and BM25 with RM3. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Evaluation</head><p>We measure the recall, which is aggregated over 18 datasets per model, and per prompt type. The datasets cover a range of diverse topics and were provided as part of TIRA <ref type="bibr" target="#b24">[24]</ref> / TIREx <ref type="bibr" target="#b25">[25]</ref>. The aggregated results can be observed in Table <ref type="table">4</ref>.2. Avg easy excludes evidently (cord19, longeval) and presumably (medline) difficult datasets. This highlights the difficulties LLMs experience on specific datasets, especially domain-specific ones: Excluding those, CoT+OpenAI GPT-3.5 Turbo (GPT) now performs 0.03 points better than baseline models. Note that the two Avg rows cannot be compared to one another, as baseline scores have also shifted due to the exclusion of generally low-performing datasets. Detailed results for each dataset can be found in Appendix C.1. For our query expansion approaches, it is evident that the choice of prompt has a large impact on recall performance. The combination of CoT and GPT consistently yields the highest recall in absolute numbers. However, with other prompt types such as Q2E/ZS and Q2E/FS, GPT also frequently achieves the highest recall per dataset, albeit less frequently compared to CoT. In this regard, our results are consistent with those reported in <ref type="bibr" target="#b15">[16]</ref>. Although CoT generally performs the best, it exhibits poorer results than the baselines in datasets such as cord19 or the longeval datasets. In these cases, Q2E/ZS and Q2E/FS emerge as better choices, but are still commonly outperformed by the baseline models. Q2E/FS exhibits less convincing effectiveness, presumably because it mimics the relatively short responses of example queries through the Q2E/FS method, resulting in short queries with few new keywords. Q2E/ZS behaves similarly. Although the responses of the LLMs are longer compared to Q2E/FS, as the LLMs do not conform to the rather short examples, the generated responses are overall less extensive than those of CoT, likely resulting in inferior effectiveness. Considering the longeval datasets and cord19, it is evident that they contain either very general or highly specific queries. In the case of nonspecific queries, there is a risk that they may be muddled by the consequently more general, and in the case of CoT, extensive responses from the LLMs. This effect might potentially be reversed by conveying the user intent to the LLM, indicating whether, for instance, in the case of the query "car," the user intends to buy one or have it repaired. With domain-specific queries, it is plausible that models were trained with insufficient knowledge on the subject, resulting in subpar effectiveness.</p><p>While our main evaluation is conducted using recall@1000, we also evaluated nDCG@10. RIX <ref type="bibr" target="#b34">[34]</ref> Long Word Count Sentence Count</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 3</head><p>Implemented Text Features with the respective formulas. Syllable count and word count were implemented using the provided tools by the Text Feature Libraries Textstat and spaCy.</p><p>The results for this metric are detailed in Table <ref type="table">4</ref> in the appendix. Overall, our conclusions for nDCG are similar to those for recall. The generations for each model and each prompt are publicly available in our repository <ref type="foot" target="#foot_6">7</ref> .</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Summary</head><p>In this part, we generated different versions of query expansions using three LLMs and three prompt templates. We were able to demonstrate that LLMs are capable of improving the recall of user queries. The combination of the prompt CoT alongside GPT proves to be the most promising, improving recall scores by up to 15%. Future research could focus on further templates for using the generated expansions since we only evaluated the qqqqq, response-format.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Text Features</head><p>Text Features are quantified metrics describing syntactic or semantic features of natural language. An example is the readability of a text, useful for returning user-dependent search results. A search engine targeted to school children should return results with a high readability score, whereas a search engine with domain experts as target audience will also include texts with low readability scores. Additionally, this could be used to filter out noisy websites. This Open Web Search component<ref type="foot" target="#foot_7">8</ref> incorporates two tools for computing text features, namely Textstat <ref type="bibr" target="#b26">[26]</ref> and textdescriptives <ref type="bibr" target="#b27">[27]</ref> from spaCy <ref type="bibr" target="#b28">[28]</ref>. SpaCy's Text Feature analysis is more comprehensive than the one in Textstat, but is less efficient. Per design of the pipeline approach of SpaCy many things are computed in the background, from which only a few are required for the calculation of the text features. This overhead results in a longer runtime which should be considered. Table <ref type="table">3</ref> displays the implemented text features.</p><p>Additional contributions besides the integration of the text features components include examining a potential correlation between text features and documents evaluated as relevant by ranked retrieval models. For easier exploration of the document corpus we provide an interactive Jupyter Notebook showing correlation graphs between Ranked Retrieval and Text Features, applicable to arbitrary datasets, as well as the analysis of correlations between Ranked Retrieval and Text Measures.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.1.">Evaluation</head><p>To verify the capability to differentiate between levels of Readability, unit tests were used. The test data consists of multi-sentence snippets from web pages. These were categorized by difficulty in the following categories (including the amount of test documents): children (3), teenagers (3), academic (3), and simple language (2), depending on what demographic the source was directed to. Initial tests involved a project member assessing the reading level of excerpts and comparing their assessments to the classifications provided by the automated measures, thus proving correct usage of the used text feature libraries at least for the readability scores.</p><p>Compared to human assessments, the automated Text Measures often overestimated the reading level, possibly failing to capture the complexities of human reading abilities within their respective indexes. Large-scale dataset computations further highlighted the discrepancies between predicted and human-classified reading levels, corroborating these findings. Despite the observed differences in assessment, the data suggested an inverse proportional relationship between comprehension levels and readability measures.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.2.">Experiment Design</head><p>The experiments were run on the "antique/test" <ref type="bibr" target="#b35">[35]</ref> dataset from the ir_datasets collection <ref type="bibr" target="#b36">[36]</ref>. Based on TIRA <ref type="bibr" target="#b37">[37]</ref> ranked retrieval models were used to create top-10 results.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.2.1.">Correlations between ranked retrieval and text feature readability</head><p>A primary objective of our project was to investigate whether the ranking of relevant documents by ranked retrieval models correlates to document Readability.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.2.2.">Readability of Top 10</head><p>First we looked at the top 10 retrieved documents for all queries across multiple retrieval models, the resulting distributions are displayed in Figure <ref type="figure">3</ref>. The majority of results, assessed using the Flesch Reading Ease, indicates comprehension levels at or below an eighth-grade level, implying a high degree of readability. The high degree of readability was consistently observed across multiple retrieval models. Compared to the overall readability across all documents in a collection, we found that some retrieval models like SBERT or MonoT5 indeed result in a higher readability in the retrieved documents compared to the rest of the corpora, suggesting a potential relationship between relevancy and readability, whereas other retrieval models such as BM25 do not share this characteristic.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>A. Hackathon</head><p>The paper's work was carried out by students from TU Dresden as part of a one-week hackathon.</p><p>The workshop was open to students in the Computer Science program and related fields, and they could earn ECTS credit points for lab work. The hackathon was advertised on the mailing lists of the beginner Information Retrieval courses from the past three years. Interested students could fill a survey indicating their preferred timeframe for the hackathon.</p><p>After a date was decided, 10 students signed up for the hackathon, three from the Bachelor's program and seven from the Master's program. The university supervisors prepared four topics, which were advertised beforehand, and the students signed up for their preferred topics. The text features topic was designated to the 3 Bachelor students. The Master students were provided with a peer-reviewed research paper as additional material, which they were required to read and understand before the hackathon.</p><p>On the first day of the hackathon, an invited member of the Open Web Search project provided a brief introduction to the Open Web Search ecosystem and TIRA/TIREx. Following this, the teams worked on their components, with supervisors providing guidance through daily checkins. On the fifth and final day of the hackathon, a short presentation from each team was held. Following the hackathon, the students were requested to prepare a report on their work, which served as the basis for this paper.</p><p>In retrospective, the short amount of time, one week, motivated the students to work diligently on their project. However, at the end of the week, the students had several open ideas for future work which they could not finish in time. Therefore, more time, even a few days more, might be beneficial for the next iteration of the hackathon. The size of the group ranged between two and three members. The small group size facilitated the organization within each group and kept the management overhead small. The topics of the hackathon were aligned with the basics gained during the Information Retrieval course, but required also reading additional literature and research.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B. Text Snippet Extraction</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B.1. Implementation</head><p>To implement the described re-ranking steps, we utilized several Python libraries, detailed below to facilitate reproducibility. For snippet extraction in Step 1, we adapted the Spacy-PassageChunker class from the corpus_processing package, as provided by Dalton et al. <ref type="bibr" target="#b6">[7]</ref>, to allow for variable snippet sizes. The class requires spaCy <ref type="bibr" target="#b38">[38]</ref>; we used version 3.3.0 for our implementation. The snippet pre-ranking in Step 2 was implemented using PyTerrier <ref type="bibr" target="#b39">[39]</ref>, version 0.10.0. For Step 4 we utilized ms-marco-MiniLM-L-6-v2 which has been published on HuggingFace.co <ref type="bibr" target="#b40">[40]</ref>. To embed the model into our project, we used the transformers library <ref type="bibr" target="#b40">[40]</ref>, version 4.38.2, and the PyTorch library <ref type="bibr" target="#b41">[41]</ref>, version 2.2.0. The results of the preparation steps are accessible via TIRA <ref type="bibr" target="#b24">[24]</ref> / TIREx <ref type="bibr" target="#b25">[25]</ref>. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>B.2. Results on other evaluated datasets</head></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Experimental results on different datasets, blue bars denote NDCG@10, while red bars indicate MRR.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 4 :</head><label>4</label><figDesc>Figure 4: Experimental results on other datasets</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Jagerman et al. [16]  follow a similar approach but extend the experiments to include alternative LLMs and additional prompt types. All</figDesc><table><row><cell>Model</cell><cell cols="5">Temperature min. Tokens max. Tokens Quantization Parameters</cell></row><row><cell>Llama 2</cell><cell>1.1</cell><cell>10</cell><cell>200</cell><cell>4 bit</cell><cell>7B</cell></row><row><cell>FLAN-UL2</cell><cell>0.5</cell><cell>10</cell><cell>200</cell><cell>8 bit</cell><cell>20B</cell></row><row><cell>GPT-3.5 Turbo</cell><cell>0.5</cell><cell>-</cell><cell>200</cell><cell>-</cell><cell>175B</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head></head><label></label><figDesc>: Chain of Thoughts (CoT) where the model is prompted to document its thought process, Query to Expansion with Zero-Shot prompting (Q2E/ZS) where the model should reformulate the query directly, and Query to Expansion with Few-Shot prompting (Q2E/FS), where three examples for the desired query format are provided to the model. For the exact prompt format used, see Appendix C.3. It should be noted that the prompt for Q2E/ZS differs between the models.</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 2</head><label>2</label><figDesc>Recall@1000 evaluation results. The best value across different configurations is bolded. Grey values failed to outperform the best baseline effectiveness. Avg denotes the arithmetic mean scores all 18 datasets. Avg easy excludes the cord19, longeval and medline datasets.</figDesc><table><row><cell></cell><cell></cell><cell>Baseline</cell><cell></cell><cell>CoT</cell><cell></cell><cell></cell><cell>Q2E/FS</cell><cell></cell><cell></cell><cell>Q2E/ZS</cell><cell></cell></row><row><cell></cell><cell cols="11">BM25 KL RM3 FLAN Llama GPT FLAN Llama GPT FLAN Llama GPT</cell></row><row><cell>Avg</cell><cell>0.66</cell><cell>0.67 0.69</cell><cell>0.68</cell><cell>0.67</cell><cell>0.69</cell><cell>0.66</cell><cell>0.67</cell><cell>0.67</cell><cell>0.67</cell><cell>0.67</cell><cell>0.68</cell></row><row><cell>Avg easy</cell><cell>0.72</cell><cell>0.73 0.75</cell><cell>0.76</cell><cell>0.76</cell><cell>0.78</cell><cell>0.73</cell><cell>0.74</cell><cell>0.73</cell><cell>0.74</cell><cell>0.76</cell><cell>0.76</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">https://github.com/tira-io/workshop-on-open-web-search-tu-dresden-01</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">https://github.com/tira-io/workshop-on-open-web-search-tu-dresden-02</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">https://platform.openai.com/docs/models/gpt-3-5-turbo</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">https://platform.openai.com/docs/models/gpt-3-5-turbo</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">https://platform.openai.com/docs/api-reference/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_5">https://github.com/tira-io/workshop-on-open-web-search-tu-dresden-03</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="7" xml:id="foot_6">https://github.com/tira-io/workshop-on-open-web-search-tu-dresden-03/tree/main/src/generated</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_7">https://github.com/tira-io/workshop-on-open-web-search-tu-dresden-04</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="7.">Acknowledgments</head><p>We would like to express our gratitude to the Open Search Foundation for organizing the WOWS 2024 and especially Maik Fröbe, who supported us and our student teams in organizing and conducting our Hackathon which made this submission possible.</p><p>In addition, the authors gratefully acknowledge the computing time made available to them on the high-performance computer at the NHR Center of TU Dresden. This center is jointly supported by the Federal Ministry of Education and Research and the state governments participating in the NHR (www.nhr-verein.de/unsere-partner).</p></div>
			</div>

			<div type="annex">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>C. Query Expansion</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 7</head><p>Prompt formats used for Meta Llama 2 7B Chat (Llama). Note the necessity for a system prompt and the additional formatting sequences due to the instruction fine-tuning of Llama-Chat. Prompts were modified to fit Llama's behaviour.</p></div>			</div>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">1st International Workshop on Open Web Search (WOWS)</title>
		<author>
			<persName><forename type="first">S</forename><surname>Farzana</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Granitzer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Hendriksen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Hiemstra</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Zerhoudi</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Advances in Information Retrieval. 46th European Conference on IR Research (ECIR 2024)</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Genre classification of web pages</title>
		<author>
			<persName><forename type="first">S</forename><surname>Meyer Zu Eissen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Advances in Artificial Intelligence</title>
				<editor>
			<persName><forename type="first">S</forename><surname>Biundo</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">T</forename><surname>Frühwirth</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Palm</surname></persName>
		</editor>
		<meeting><address><addrLine>Berlin Heidelberg; Berlin, Heidelberg</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2004">2004. 2004</date>
			<biblScope unit="page" from="256" to="269" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">A taxonomy of web search</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">Z</forename><surname>Broder</surname></persName>
		</author>
		<idno type="DOI">10.1145/792550.792552</idno>
		<idno>doi:10.1145/792550.792552</idno>
		<ptr target="https://doi.org/10.1145/792550.792552" />
	</analytic>
	<monogr>
		<title level="j">SIGIR Forum</title>
		<imprint>
			<biblScope unit="volume">36</biblScope>
			<biblScope unit="page" from="3" to="10" />
			<date type="published" when="2002">2002</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Snorkel: Rapid training data creation with weak supervision</title>
		<author>
			<persName><forename type="first">A</forename><surname>Ratner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">H</forename><surname>Bach</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Ehrenberg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Fries</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Ré</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the VLDB endowment. International conference on very large data bases</title>
				<meeting>the VLDB endowment. International conference on very large data bases</meeting>
		<imprint>
			<publisher>NIH Public Access</publisher>
			<date type="published" when="2017">2017</date>
			<biblScope unit="volume">11</biblScope>
			<biblScope unit="page">269</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Scikit-learn: Machine learning in Python</title>
		<author>
			<persName><forename type="first">F</forename><surname>Pedregosa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Varoquaux</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gramfort</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Michel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Thirion</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Grisel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Blondel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Prettenhofer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Weiss</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Dubourg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Vanderplas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Passos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Cournapeau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Brucher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Perrot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Duchesnay</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Machine Learning Research</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page" from="2825" to="2830" />
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<monogr>
		<title level="m" type="main">Passage Re-ranking with BERT</title>
		<author>
			<persName><forename type="first">R</forename><surname>Nogueira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Cho</surname></persName>
		</author>
		<idno type="DOI">10.48550/arXiv.1901.04085</idno>
		<ptr target="http://arxiv.org/abs/1901.04085.doi:10.48550/arXiv.1901.04085" />
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<monogr>
		<author>
			<persName><forename type="first">J</forename><surname>Dalton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Xiong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Callan</surname></persName>
		</author>
		<idno type="DOI">10.48550/arXiv.2003.13624</idno>
		<ptr target="http://arxiv.org/abs/2003.13624.doi:10.48550/arXiv.2003.13624" />
		<title level="m">TREC CAsT 2019: The Conversational Assistance Track Overview</title>
				<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">The lemur project and its clueweb12 dataset</title>
		<author>
			<persName><forename type="first">J</forename><surname>Callan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Invited talk at the SIGIR 2012 Workshop on Open-Source Information Retrieval</title>
				<imprint>
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<monogr>
		<author>
			<persName><forename type="first">J</forename><surname>Callan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hoy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Yoo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Zhao</surname></persName>
		</author>
		<title level="m">Clueweb09 data set</title>
				<imprint>
			<date type="published" when="2009">2009</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Relevance feedback in information retrieval</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">J R</forename><genName>Jr</genName></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">The SMART retrieval system: experiments in automatic document processing</title>
				<imprint>
			<date type="published" when="1971">1971</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">To see, or not to see-is that the query?</title>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">R</forename><surname>Kor Age</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 14th annual international ACM SIGIR conference on Research and development in information retrieval</title>
				<meeting>the 14th annual international ACM SIGIR conference on Research and development in information retrieval</meeting>
		<imprint>
			<date type="published" when="1991">1991</date>
			<biblScope unit="page" from="134" to="141" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Query improvement in information retrieval using genetic algorithms-a report on the experiments of the trec project</title>
		<author>
			<persName><forename type="first">J</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">R</forename><surname>Kor Age</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Rasmussen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Text REtrieval Conference (TREC-1)</title>
				<meeting>the Text REtrieval Conference (TREC-1)</meeting>
		<imprint>
			<date type="published" when="1993">1993</date>
			<biblScope unit="page" from="31" to="58" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">On Information and Sufficiency</title>
		<author>
			<persName><forename type="first">S</forename><surname>Kullback</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">A</forename><surname>Leibler</surname></persName>
		</author>
		<idno type="DOI">10.1214/aoms/1177729694</idno>
		<ptr target="https://doi.org/10.1214/aoms/1177729694.doi:10.1214/aoms/1177729694" />
	</analytic>
	<monogr>
		<title level="j">The Annals of Mathematical Statistics</title>
		<imprint>
			<biblScope unit="volume">22</biblScope>
			<biblScope unit="page" from="79" to="86" />
			<date type="published" when="1951">1951</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Kullback-leibler divergence revisited</title>
		<author>
			<persName><forename type="first">F</forename><surname>Raiber</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Kurland</surname></persName>
		</author>
		<idno type="DOI">10.1145/3121050.3121062</idno>
		<idno>doi:10.1145/3121050.3121062</idno>
		<ptr target="https://doi.org/10.1145/3121050.3121062" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the ACM SIGIR International Conference on Theory of Information Retrieval, ICTIR &apos;17</title>
				<meeting>the ACM SIGIR International Conference on Theory of Information Retrieval, ICTIR &apos;17<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2017">2017</date>
			<biblScope unit="page" from="117" to="124" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Relevance-based language models</title>
		<author>
			<persName><forename type="first">V</forename><surname>Lavrenko</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">B</forename><surname>Croft</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">ACM SIGIR Forum</title>
				<meeting><address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2017">2017</date>
			<biblScope unit="volume">51</biblScope>
			<biblScope unit="page" from="260" to="267" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<monogr>
		<author>
			<persName><forename type="first">R</forename><surname>Jagerman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Zhuang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Qin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Bendersky</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2305.03653</idno>
		<title level="m">Query expansion by prompting large language models</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Can generative llms create query variants for test collections? an exploratory study</title>
		<author>
			<persName><forename type="first">M</forename><surname>Alaofi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Gallagher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Sanderson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Scholer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Thomas</surname></persName>
		</author>
		<idno type="DOI">10.1145/3539618.3591960</idno>
		<idno>doi:10.1145/3539618.3591960</idno>
		<ptr target="https://doi.org/10.1145/3539618.3591960" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR &apos;23</title>
				<meeting>the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR &apos;23<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="1869" to="1873" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<monogr>
		<title/>
		<author>
			<persName><forename type="first">H</forename><surname>Touvron</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Martin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Stone</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Albert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Almahairi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Babaei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Bashlykov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Batra</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Bhargava</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Bhosale</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Bikel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Blecher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">C</forename><surname>Ferrer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Cucurull</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Esiobu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Fernandes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Fu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Fu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Fuller</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Gao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Goswami</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Goyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Hartshorn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Hosseini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Hou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Inan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Kardas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Kerkez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Khabsa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Kloumann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Korenev</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">S</forename><surname>Koura</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-A</forename><surname>Lachaux</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Lavril</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Liskovich</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Lu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Mao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Martinet</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Mihaylov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Mishra</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Molybog</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Nie</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Poulton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Reizenstein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Rungta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Saladi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Schelten</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Silva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">M</forename><surname>Smith</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Subramanian</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><forename type="middle">E</forename><surname>Tan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Tang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Taylor</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Williams</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">X</forename></persName>
		</author>
		<imprint/>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<monogr>
		<author>
			<persName><forename type="first">P</forename><surname>Kuan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Xu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Yan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zarov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Fan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kambadur</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Narang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Rodriguez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Stojnic</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Edunov</surname></persName>
		</author>
		<author>
			<persName><surname>Scialom</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2307.09288</idno>
		<title level="m">Llama 2: Open foundation and fine-tuned chat models</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">Ul2: Unifying language learning paradigms</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Tay</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Dehghani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><forename type="middle">Q</forename><surname>Tran</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Garcia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Wei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><forename type="middle">W</forename><surname>Chung</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Bahri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Schuster</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Zheng</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">The Eleventh International Conference on Learning Representations</title>
				<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Neural text generation for query expansion in information retrieval</title>
		<author>
			<persName><forename type="first">V</forename><surname>Claveau</surname></persName>
		</author>
		<idno type="DOI">10.1145/3486622.3493957</idno>
		<ptr target="https://hal.science/hal-03494692.doi:10.1145/3486622.3493957" />
	</analytic>
	<monogr>
		<title level="m">WI-IAT 2021 -20th IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology, Proceedings of the WI-IAT Conference</title>
				<meeting><address><addrLine>Melbourne, Australia</addrLine></address></meeting>
		<imprint>
			<publisher>IEEE</publisher>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="1" to="8" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Query2doc: Query expansion with large language models</title>
		<author>
			<persName><forename type="first">L</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Wei</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
				<meeting>the 2023 Conference on Empirical Methods in Natural Language Processing</meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="9414" to="9423" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Rocketqav2: A joint training method for dense passage retrieval and passage re-ranking</title>
		<author>
			<persName><forename type="first">R</forename><surname>Ren</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Qu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">X</forename><surname>Zhao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Q</forename><surname>She</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J.-R</forename><surname>Wen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
				<meeting>the 2021 Conference on Empirical Methods in Natural Language Processing</meeting>
		<imprint>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="2825" to="2835" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">The information retrieval experiment platform</title>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">H</forename><surname>Reimer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Macavaney</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Deckers</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Reich</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bevendorff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<idno type="DOI">10.1145/3539618.3591888</idno>
		<idno>doi:10. 1145/3539618.3591888</idno>
		<ptr target="http://dx.doi.org/10.1145/3539618.3591888" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR &apos;23</title>
				<meeting>the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR &apos;23</meeting>
		<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">Continuous Integration for Reproducible Shared Tasks with TIRA</title>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Wiegmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Kolyada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Grahm</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Elstner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Loebe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<idno type="DOI">10.1007/978-3-031-28241-6_20</idno>
		<idno>doi:</idno>
		<ptr target="10.1007/978-3-031-28241-6_20" />
	</analytic>
	<monogr>
		<title level="m">Advances in Information Retrieval. 45th European Conference on IR Research (ECIR 2023)</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<editor>
			<persName><forename type="first">J</forename><surname>Kamps</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">L</forename><surname>Goeuriot</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Maistro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Joho</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Davis</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">C</forename><surname>Gurrin</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">U</forename><surname>Kruschwitz</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Caputo</surname></persName>
		</editor>
		<meeting><address><addrLine>Berlin Heidelberg New York</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="236" to="241" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">The Information Retrieval Experiment Platform</title>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">H</forename><surname>Reimer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Macavaney</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Deckers</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Reich</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bevendorff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<idno type="DOI">10.1145/3539618.3591888</idno>
		<idno>doi:</idno>
		<ptr target="10.1145/3539618.3591888" />
	</analytic>
	<monogr>
		<title level="m">46th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2023)</title>
				<editor>
			<persName><forename type="first">H.-H</forename><surname>Chen</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">W.-J</forename><forename type="middle">E</forename><surname>Duh</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H.-H</forename><surname>Huang</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><forename type="middle">P</forename><surname>Kato</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Mothe</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Poblete</surname></persName>
		</editor>
		<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="2826" to="2836" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<monogr>
		<author>
			<persName><forename type="first">S</forename><surname>Bansal</surname></persName>
		</author>
		<ptr target="https://github.com/textstat/" />
		<title level="m">textstat</title>
				<imprint>
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<analytic>
		<title level="a" type="main">TextDescriptives: A Python package for calculating a large variety of metrics from text</title>
		<author>
			<persName><forename type="first">L</forename><surname>Hansen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">R</forename><surname>Olsen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Enevoldsen</surname></persName>
		</author>
		<idno type="DOI">10.21105/joss.05153</idno>
		<ptr target="https://joss.theoj.org/papers/10.21105/joss.05153.doi:10.21105/joss.05153" />
	</analytic>
	<monogr>
		<title level="j">Journal of Open Source Software</title>
		<imprint>
			<biblScope unit="volume">8</biblScope>
			<biblScope unit="page">5153</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b28">
	<monogr>
		<title level="m" type="main">spaCy: Industrial-strength Natural Language Processing in Python</title>
		<author>
			<persName><forename type="first">M</forename><surname>Honnibal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Montani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Van Landeghem</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Boyd</surname></persName>
		</author>
		<idno type="DOI">10.5281/zenodo.1212303</idno>
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b29">
	<monogr>
		<title level="m" type="main">Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">P</forename><surname>Kincaid</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">P</forename><surname>Fishburne</surname><genName>Jr</genName></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">L</forename><surname>Rogers</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">S</forename><surname>Chissom</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1975">1975</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b30">
	<analytic>
		<title level="a" type="main">In defense of the fog index</title>
		<author>
			<persName><forename type="first">J</forename><surname>Bogert</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">The Bulletin of the Association for Business Communication</title>
		<imprint>
			<biblScope unit="volume">48</biblScope>
			<biblScope unit="page" from="9" to="12" />
			<date type="published" when="1985">1985</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b31">
	<analytic>
		<title level="a" type="main">Smog grading-a new readability formula</title>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">H</forename><surname>Mc Laughlin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of reading</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page" from="639" to="646" />
			<date type="published" when="1969">1969</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b32">
	<monogr>
		<title level="m" type="main">Automated readability index</title>
		<author>
			<persName><forename type="first">R</forename><surname>Senter</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">A</forename><surname>Smith</surname></persName>
		</author>
		<imprint>
			<date type="published" when="1967">1967</date>
		</imprint>
	</monogr>
	<note type="report_type">DTIC document</note>
</biblStruct>

<biblStruct xml:id="b33">
	<analytic>
		<title level="a" type="main">A computer readability formula designed for machine scoring</title>
		<author>
			<persName><forename type="first">M</forename><surname>Coleman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">L</forename><surname>Liau</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Applied Psychology</title>
		<imprint>
			<biblScope unit="volume">60</biblScope>
			<biblScope unit="page">283</biblScope>
			<date type="published" when="1975">1975</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b34">
	<analytic>
		<title level="a" type="main">Lix and rix: Variations on a little-known readability index</title>
		<author>
			<persName><forename type="first">J</forename><surname>Anderson</surname></persName>
		</author>
		<ptr target="http://www.jstor.org/stable/40031755" />
	</analytic>
	<monogr>
		<title level="j">Journal of Reading</title>
		<imprint>
			<biblScope unit="volume">26</biblScope>
			<biblScope unit="page" from="490" to="496" />
			<date type="published" when="1983">1983</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b35">
	<analytic>
		<title level="a" type="main">Antique: A non-factoid question answering benchmark</title>
		<author>
			<persName><forename type="first">H</forename><surname>Hashemi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Aliannejadi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Zamani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Croft</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">ECIR</title>
				<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b36">
	<analytic>
		<title level="a" type="main">Simplified data wrangling with ir_datasets</title>
		<author>
			<persName><forename type="first">S</forename><surname>Macavaney</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Yates</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Feldman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Downey</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Cohan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Goharian</surname></persName>
		</author>
		<idno type="DOI">10.1145/3404835.3463254</idno>
		<idno>doi:10.1145/3404835.3463254</idno>
		<ptr target="https://doi.org/10.1145/3404835.3463254" />
	</analytic>
	<monogr>
		<title level="m">SIGIR &apos;21: The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event</title>
				<editor>
			<persName><forename type="first">F</forename><surname>Diaz</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">C</forename><surname>Shah</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">T</forename><surname>Suel</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Castells</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Jones</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">T</forename><surname>Sakai</surname></persName>
		</editor>
		<meeting><address><addrLine>, Canada</addrLine></address></meeting>
		<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2021">July 11-15, 2021. 2021</date>
			<biblScope unit="page" from="2429" to="2436" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b37">
	<analytic>
		<title level="a" type="main">Continuous Integration for Reproducible Shared Tasks with TIRA</title>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Wiegmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Kolyada</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Grahm</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Elstner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Loebe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<idno type="DOI">10.1007/978-3-031-28241-6_20</idno>
		<idno>doi:</idno>
		<ptr target="10.1007/978-3-031-28241-6_20" />
	</analytic>
	<monogr>
		<title level="m">Advances in Information Retrieval. 45th European Conference on IR Research (ECIR 2023)</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<editor>
			<persName><forename type="first">J</forename><surname>Kamps</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">L</forename><surname>Goeuriot</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">F</forename><surname>Crestani</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Maistro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Joho</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Davis</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">C</forename><surname>Gurrin</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">U</forename><surname>Kruschwitz</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Caputo</surname></persName>
		</editor>
		<meeting><address><addrLine>Berlin Heidelberg New York</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="236" to="241" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b38">
	<monogr>
		<title level="m" type="main">spaCy: Industrial-strength Natural Language Processing in Python</title>
		<author>
			<persName><forename type="first">I</forename><surname>Montani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Honnibal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Boyd</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">V</forename><surname>Landeghem</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Peters</surname></persName>
		</author>
		<idno type="DOI">10.5281/zenodo.10009823</idno>
		<ptr target="https://zenodo.org/records/10009823.doi:10.5281/zenodo.10009823" />
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b39">
	<analytic>
		<title level="a" type="main">Declarative experimentation in information retrieval using pyterrier</title>
		<author>
			<persName><forename type="first">C</forename><surname>Macdonald</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Tonellotto</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2020 ACM SIGIR on International Conference on Theory of Information Retrieval</title>
				<meeting>the 2020 ACM SIGIR on International Conference on Theory of Information Retrieval</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="161" to="168" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b40">
	<analytic>
		<title level="a" type="main">Transformers: State-of-the-Art Natural Language Processing</title>
		<author>
			<persName><forename type="first">T</forename><surname>Wolf</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Debut</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Sanh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chaumond</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Delangue</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Moi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Cistac</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Rault</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Louf</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Funtowicz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Davison</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Shleifer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Von Platen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Ma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Jernite</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Plu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Xu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">Le</forename><surname>Scao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Gugger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Drame</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Q</forename><surname>Lhoest</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Rush</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2020.emnlp-demos.6</idno>
		<ptr target="https://aclanthology.org/2020.emnlp-demos.6.doi:10.18653/v1/2020.emnlp-demos.6" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, Association for Computational Linguistics</title>
				<editor>
			<persName><forename type="first">Q</forename><surname>Liu</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><surname>Schlangen</surname></persName>
		</editor>
		<meeting>the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="38" to="45" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b41">
	<analytic>
		<title level="a" type="main">PyTorch: An Imperative Style, High-Performance Deep Learning Library</title>
		<author>
			<persName><forename type="first">A</forename><surname>Paszke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Gross</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Massa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Lerer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bradbury</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Chanan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Killeen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Lin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Gimelshein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Antiga</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Desmaison</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Kopf</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Devito</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Raison</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Tejani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Chilamkurthy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Steiner</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Fang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bai</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Chintala</surname></persName>
		</author>
		<ptr target="http://papers.neurips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf" />
	</analytic>
	<monogr>
		<title level="m">Advances in Neural Information Processing Systems 32</title>
				<editor>
			<persName><forename type="first">H</forename><surname>Wallach</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">H</forename><surname>Larochelle</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><surname>Beygelzimer</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">F</forename><surname>Buc</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Fox</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Garnett</surname></persName>
		</editor>
		<imprint>
			<publisher>Curran Associates, Inc</publisher>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="8024" to="8035" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
