<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Team cnlp-nits-pp at PAN: Leveraging BERT for Accurate Authorship Verification: A Novel Approach to Textual Attribution Notebook for the PAN Lab at CLEF 2024</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Annepaka</forename><surname>Yadagiri</surname></persName>
							<email>annepaka22rs@cse.nits.ac.in</email>
							<affiliation key="aff0">
								<orgName type="department">Computer Science &amp; Engineering</orgName>
								<orgName type="institution">National Institute of Technology</orgName>
								<address>
									<settlement>Silchar</settlement>
									<region>Assam</region>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Dimpal</forename><surname>Kalita</surname></persName>
							<email>kalitadimpal112@gmail.com</email>
							<affiliation key="aff0">
								<orgName type="department">Computer Science &amp; Engineering</orgName>
								<orgName type="institution">National Institute of Technology</orgName>
								<address>
									<settlement>Silchar</settlement>
									<region>Assam</region>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Abhishek</forename><surname>Ranjan</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">Computer Science &amp; Engineering</orgName>
								<orgName type="institution">National Institute of Technology</orgName>
								<address>
									<settlement>Silchar</settlement>
									<region>Assam</region>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Ashish</forename><surname>Kumar Bostan</surname></persName>
							<email>kumarashishbostan@gmail.com</email>
							<affiliation key="aff0">
								<orgName type="department">Computer Science &amp; Engineering</orgName>
								<orgName type="institution">National Institute of Technology</orgName>
								<address>
									<settlement>Silchar</settlement>
									<region>Assam</region>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Parthib</forename><surname>Toppo</surname></persName>
							<email>toppoparthib@gmail.com</email>
							<affiliation key="aff0">
								<orgName type="department">Computer Science &amp; Engineering</orgName>
								<orgName type="institution">National Institute of Technology</orgName>
								<address>
									<settlement>Silchar</settlement>
									<region>Assam</region>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Partha</forename><surname>Pakray</surname></persName>
							<email>partha@cse.nits.ac.in</email>
							<affiliation key="aff0">
								<orgName type="department">Computer Science &amp; Engineering</orgName>
								<orgName type="institution">National Institute of Technology</orgName>
								<address>
									<settlement>Silchar</settlement>
									<region>Assam</region>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Team cnlp-nits-pp at PAN: Leveraging BERT for Accurate Authorship Verification: A Novel Approach to Textual Attribution Notebook for the PAN Lab at CLEF 2024</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">FBC1081BFBDEFA7306DA78E7374C9D63</idno>
					<idno type="arXiv">arXiv:2310.05130.</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T17:54+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Large Language Models</term>
					<term>AI-Generated Content Detection</term>
					<term>Natural Language Processing</term>
					<term>Generative AI</term>
					<term>BERT</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>The launch of ai-generated tools has attracted a lot of interest from the academic and business worlds. Effectively handling a broad spectrum of human inquiries, ai-generated tools offer clear, thorough responses that far outperform earlier open-source chatbots regarding security and use. People are interested in learning how powerful AI is and how far it has come from human specialists. However, concerns about the possible detrimental effects large language models like ChatGPT can have on society-including fake news, plagiarism, and social security problems-are beginning to surface. In this work, The dataset is provided from CLEF PAN-24 humanwritten text data and 13 different types of ai-generated models text data like alpaca-7b,bigscience-bloomz-7b1, chavinlo-alpaca-13b, Gemini-pro, gpt-3.5-turbo-0125,gpt-4-turbo-preview,meta-llama-llama-2-7b-chat-hf,metallama-llama-2-70b-chat-hf,mistralai-mistral-7b-instruct-v0.2,mistralai-mixtral-8x7b-instruct-v0.1,qwen-qwen1.5-72b-chat-8bit,text-bison-002,vicgalle-gpt2-open-instruct-v1. which approximately provides imbalanced data. The comparison of human-written and ai-generated data. We examine the features of ChatGPT's replies, the distinctions and shortcomings of human experts, and the prospects for LLMs based on the pan-24 dataset. We conducted extensive human assessments and linguistic examinations of ai-generated content compared to human content, yielding several intriguing findings. Then, we conduct in-depth research on the best ways to identify whether a given text was produced by ai-generated or humans. We construct three distinct detection systems, investigate critical variables affecting their performance, and test them in various contexts. Our solution approach for this task involves using the BERT model with a preprocessing model, where we achieved classification results with over 97.6% ROC-AUC for all the results included in this challenge.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>AI text. Moreover, detection models trained on specific text types may not perform well on others, requiring extensive retraining and resources. Ethical and practical concerns also arise, such as the risk of false positives and negatives, privacy issues in data analysis, and the ongoing need to adapt to new AI techniques. Addressing these issues involves continuous advancements in detection algorithms and comprehensive research efforts.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Dataset Description</head><p>This section outlines the classification methods and specific model training approaches, Section 3.2 discusses the model's overall structure and Section 3.3 focuses on the key points of model training. The dataset, acquired via CLEF 2024 PAN <ref type="bibr" target="#b0">[1]</ref>, consists of about 1,087 rows of text composed by humans and approximately 14,131 rows of text produced by AI. The text comprises a combination of authentic and fraudulent news stories from different 2021 U.S. news headlines. Initially, the dataset contained numerous JSON encodings, which were removed in the first step. During further analysis of the cleaned dataset, NAN values were identified. These were addressed by consolidating all data into a single data frame. Using linguistic analysis, the text column extracted features such as average line length, vocabulary, word density, and POS tags. This provides an overview of the data processing steps, as shown in Figure <ref type="figure" target="#fig_0">1</ref>. From this dataset, we extracted feature statistics. Table <ref type="table" target="#tab_0">1</ref> represents statistics and feature extraction data.  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">System Overview</head><p>This section examines the linguistic differences between human-written and AI-generated texts. Next, the performance of existing detection algorithms is assessed using the PAN-24 dataset <ref type="bibr" target="#b1">[2]</ref>. Finally, the criteria used by deep learning-based detection methods are investigated.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Vocabulary Features</head><p>This section examines the vocabulary characteristics of the PAN-24 dataset. The study is focused on the word choices made by AI-generated text and humans when responding to identical queries. Given the diversity of texts written by humans and AI, these differences are analyzed during the statistical procedure. The following traits were computed: in addition to lexicon measure (V), which measures the total number of unique words used in all responses, and average length (L), which measures the average number of words in each text, an additional characteristic named word density (D) is proposed. Word density is determined by the formula D = 100 × V / (L × N), where N is the number of answers. Density quantifies the degree to which words are employed intensively in a text. For instance, if 1,000 words of the text are published but only 100 distinct words are used, the density is 100 × 100 / 1,000 = 10. The higher the density, the more different words are used in the same text length. <ref type="bibr" target="#b2">[3]</ref>. Lexical analysis Within the domain of NLP, every word can be categorized into one of several lexical categories. The part-of-speech (POS) tagging task aims to identify each word's grammatical class within a given phrase. In this section, the lexical distributions of various AI-generated and human texts in the PAN-24 dataset are computed using the POS module in NLTK <ref type="bibr" target="#b3">[4]</ref>. The data is then arranged according to lexical percentage. As illustrated in Figures <ref type="figure" target="#fig_2">2 and 3</ref>, various parts of speech are displayed. Figures <ref type="figure">4 and 5</ref> present punctuation and adposition tags, respectively. Finally, Figures <ref type="figure">6 and 7</ref> show determiners and pronouns. The statistics for the top ten lexical categories are displayed. Nouns (NOUN) make up the largest proportion of all lexical categories, while punctuation (PUNCT), verbs (VERB), adpositions (ADP), adjectives (ADJ), and determiners (DET) constitute most of the remaining categories.</p><p>When comparing human-written texts to AI-generated texts, the following observations can be made: AI-generated texts have higher proportions of nouns (NOUN), verbs (VERB), determiners (DET), adjectives (ADJ), auxiliaries (AUX), coordinating conjunctions (CCONJ), and particles (PART) than human-written texts. This suggests that the rich knowledge embedded in AI-generated texts offers a more varied vocabulary, enhancing their informativeness.</p><p>Human-written texts contain higher proportions of adverbs (ADV) and punctuation (PUNCT) than AI-generated texts. This indicates that humans prioritize structure, consistency, and logical flow, in which AI-generated texts are comparatively weaker.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Model</head><p>A BERT-based sequence classification <ref type="bibr" target="#b4">[5]</ref> and transformer-based model designed to understand the context of a word in search queries. Unlike traditional models that process text sequentially (either leftto-right or right-to-left), BERT considers the entire sequence of words simultaneously. This bidirectional approach allows BERT to grasp the context of a word based on its surrounding words, leading to better performance in NLP tasks. Key Features of BERT:</p><p>• Bidirectional Training: BERT uses a Transformer architecture that reads text bi-directionally.</p><p>This helps the model understand the context of each word more comprehensively. • Pre-training and Fine-tuning: BERT involves two main stages:</p><p>-Pre-training: The model is trained on a large corpus of text, learning to predict missing words in sentences (Masked Language Model) and the next sentence (Next Sentence Prediction). -Fine-tuning: The pre-trained BERT model is then fine-tuned on tasks such as text classification, named entity recognition, or question answering using task-specific data.</p><p>Our team plans to extract features from the original dataset, including the text and numerical columns. Initially, this dataset was utilized as training data for 3 epochs to train a new model, referred to as model A, using BERT. BERT, an enhanced version of previous models, incorporates a more significant number of parameters, more extensive training data, and larger batch sizes. It is trained significantly more significantly than CNN-BILSTM, which takes considerably longer. This extensive training allows BERT representations to generalize more effectively to downstream tasks and deliver superior performance compared to other models. As a result, the BERT model demonstrates high accuracy and faster processing speeds, as illustrated in Figure <ref type="figure">.</ref> 8.    • __init__: Initializes the dataset with text, numerical data, and labels, converting numerical data and labels to tensors. • __len__: Returns the length of the dataset.</p><p>• __getitem__: Tokenizes text data, processes numerical features, and returns a dictionary with input IDs, attention mask, and label for a given index.  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3.1.">Execution Steps</head><p>We have written software that can be run from the command line. An input file (an absolute path to the input JSONL file) and an output directory (an absolute path to the location where the results will be written) are the two arguments that the script requires. We execute the command as follows in the terminal:</p><formula xml:id="formula_0">python3 model.py &lt;input_file_path&gt; &lt;output_directory&gt;</formula><p>Here, model.py is the main Python file that loads and runs the model. The &lt;input_file_path&gt; is the path of the file containing the input texts, and the &lt;output_directory&gt; is the directory where the output file is saved. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4.">Hyperparameters</head><p>The precise adjustments a user makes to control the learning process are known as hyperparameters.</p><p>The best/optimal hyperparameters for learning algorithms must be selected during training to yield the most meaningful results. The hyperparameters used in our recommended techniques are shown in Table <ref type="table" target="#tab_2">2</ref> we selected these values by analyzing the performance of the suggested methods with different combinations of hyperparameters. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.5.">Features Extracted</head><p>Feature extraction in NLP involves transforming raw text data into a structured representation that machine learning algorithms can use for various NLP tasks. The following features were extracted, and our model was trained on those parameters.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.5.1.">Average Line Length</head><p>In NLP, average line length is the mean number of characters or words per line in a text dataset like PAN-24. A sample text has been taken from this dataset. For example,</p><p>• Text: "President Joseph R. Biden Jr. calls for unity and a renewed commitment to democracy".</p><p>• Average characters per line: 74 • Average words per line: 12</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.5.2.">Vocabulary</head><p>In NLP, vocabulary (vocab) refers to the set of unique words or tokens in a text dataset like PAN-24. A sample text has been taken from this dataset. For example,</p><p>• Text: "Biden's inauguration is impacted by the pandemic and security threats. "</p><p>• Vocabulary: "Biden's," "inauguration," "is," "impacted," "by," "the," "pandemic," "and," "security," "threats" • Size of vocabulary: 10</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.5.3.">Word Density</head><p>In NLP, word density measures how many unique words (vocabulary) appear per unit of text, calculated as 100 times the vocabulary size divided by the product of the number of lines and the average line length.</p><p>A sample text has been taken from this dataset. For example, "A new chapter of American democracy begins amidst unprecedented times. "</p><p>Step-by-Step Calculation:</p><p>• Vocabulary:</p><p>-Unique words: "A," "new," "chapter," "of," "American," "democracy," "begins," "amidst," "unprecedented, " "times" -Vocabulary size: 10</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>• Number of lines:</head><p>-There is 1 line in the text.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>• Average line length:</head><p>-Line 1: "A new chapter of American democracy begins amidst unprecedented times. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.5.4.">POS Tags</head><p>Part-of-speech (POS) tags are labels assigned to each word in a text to indicate its grammatical category, such as noun, verb, adjective, etc. POS tagging is a fundamental task in NLP that helps understand sentences' syntactic structure and meaning. Explanation of POS Tags: -Examples: "quickly, " "very, " "well. " -Usage: "He ran quickly. " • Particles -Definition: Particles are small words with grammatical functions that do not fit into other categories. -Examples: "to" (in "to go"), "not" (in "do not") -Usage: "She decided to go. " • Subordinating conjunctions -Definition: Subordinating conjunctions connect clauses to show a relationship between them. -Examples: "because, " "although, " "if" -Usage: "She stayed home because it was raining. " • Numerals -Definition: Numerals are words that represent numbers.</p><p>-Examples: "one, " "two, " "third. " -Usage: "She has two cats. " • X -Definition: Other categories of words that do not fit into the standard parts of speech.</p><p>-Examples: Foreign words, typos -Usage: "She said 'ciao' as she left. "</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.6.">Implementation</head><p>There are three major steps of our implementation as follows:</p><p>• Tokenization and Model Loading: This part sets up the tokenizer and the model into 19 distinct features as shown in Table <ref type="table" target="#tab_0">1</ref>. From among these features, only suitable features, The tokenizer and model configuration, are loaded from the 'bert-base-uncased' pre-trained model, and the actual model weights are loaded from a specified path. The model is set to evaluation mode and moved to the appropriate device (CPU or GPU). • TextDetector Class: This class takes a text string as input tokenizes it, and then uses the model to get the logits (Logits are a neural network model's raw, unnormalized outputs). The logits are converted to probabilities using the softmax function. It assumes a binary classification model and returns the second class's probability (index 1). • Comparative Score Function:</p><formula xml:id="formula_1">comparative_score(score1, score2, epsilon=1e-3)</formula><p>This function compares two scores with a small threshold (epsilon) to avoid floating-point precision issues. It returns a value between 0 and 1 based on the comparison:</p><p>-Returns a value between 0.5 and 1 if the first score is significantly higher.</p><p>-Returns a value between 0 and 0.5 if the second score is significantly higher. -Returns 0.5 if the scores are very close (within epsilon).</p><p>In the final function of calculating the result, it reads the line and parses it as JSON, then extracts the two texts (text1 and text2) and computes scores for both texts. It uses a comparative score function to determine a final score. Finally, the results are written in a JSONL file in the specified output directory.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Results</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 3</head><p>Overview of the mean accuracy over 9 variants of the test set. We report the minimum, median, maximum, the 25th, and the 75th quantile, of the mean per the 9 datasets. direct-velocity is the name of the LLM model implemented in this paper. The submission scores 8th out of 30 on the PAN CLEF generated content analysis leaderboard. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Approach</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Evaluation Metrics</head><p>Systems are assessed using the PAN authorship verification tasks as a benchmark. The metrics listed below are employed:</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.1.">ROC-AUC</head><p>The region that falls within the receiver operating characteristic (ROC) curve. Characteristics of the Receiver Operating Area An indicator of the actual positive rate against the false positive rate at different threshold settings is the area under the receiver operating characteristic curve, or "Under the Curve. " Higher numbers indicate better discrimination performance. It offers a total assessment of a model's capacity to distinguish between positive and negative classes.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.2.">Brier</head><p>The Brier score's complement (mean squared loss). For binary classification problems, the Brier score calculates the mean squared difference between the expected probability and the actual result (0 or 1). Lower Brier ratings indicate better calibration and accuracy of the probability predicted by the model.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.3.">C@1</head><p>A modified accuracy score that uses the average accuracy of the remaining instances to assign nonanswers (score = 0.5). C@1 quantifies the percentage of cases in which the model's top-ranked prediction corresponds with the ground truth label. It is a typical assessment metric for recommendation or information retrieval systems.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.4.">F1</head><p>The harmonic mean between recall and accuracy. The F1 score is calculated by taking the harmonic mean of the two variables: recall, the ratio of accurate optimistic predictions to all real positives, and precision, which is the ratio of accurate optimistic predictions to all projected positives. Better performance is indicated by higher numbers, which strike a balance between recall and precision.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.5.">F0.5u</head><p>A precision-weighted F measure (modified F0.5 measure) that considers non-answers (score = 0.5) to be false negatives. While recall is less important than precision, the F0.5 score is comparable to the F1 score. When recall is less important than precision, like in situations where erroneous positives are more expensive than false negatives, it might be helpful.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.6.">Mean</head><p>The sum of all of the following measurements. The mean score indicates the average performance across all samples or occurrences in the evaluation dataset. These metrics collectively provide insights into different aspects of model performance, including discrimination ability, calibration, accuracy, ranking quality, and the balance between precision and recall.</p><p>Table <ref type="table">3</ref> shows the results, initially pre-filled with the official baselines provided by the PAN organizers.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Baseline Models</head><p>Baseline models are simple reference models used to establish a benchmark for evaluating the performance of more complex models in machine learning and natural language processing tasks. These models provide a standard or point of comparison, allowing researchers and practitioners to assess whether new models offer improvements in accuracy, efficiency, or other relevant metrics. By comparing against baseline models, it is possible to quantify the gains achieved by novel techniques and ensure that the advancements are meaningful and not merely coincidental. Six LLM detection baselines are used as references for the model results. These six LLM detection baselines are re-implementations from the original papers: • Baseline Binoculars <ref type="bibr" target="#b5">[6]</ref> • Baseline DetectGPT <ref type="bibr" target="#b6">[7]</ref> • Baseline PPMd <ref type="bibr" target="#b7">[8]</ref> • Baseline Unmasking <ref type="bibr" target="#b8">[9]</ref> • Baseline Fast-DetectGPT <ref type="bibr" target="#b9">[10]</ref> </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusion</head><p>This paper presents a bootstrap dataset of actual and false news items encompassing multiple 2021 U.S. news headlines, using the shared task on the PAN-24 dataset, which includes almost 1087 rows of human-written text. And different in almost 13 LLMs with 14181 rows( ai-generated text). Based on the PAN-24 dataset, we conduct broad considers counting human-written content assessments, phonetic investigation, and ai-generated content discovery tests. The human-written content assessments and phonetics analysis provide us with knowledge about the specific contrasts between human-written content and AI-generated text, which persuade our considerations of LLMs' future headings. The ai-generated content substance detection experiments outline a few imperative conclusions that can give advantageous guides to the research and improvement of AIGC-detection instruments. We make all our data, code, and models publicly available to facilitate related research and applications at our git hub repository AI vs Human</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Data Processing steps</figDesc><graphic coords="2,94.57,342.94,406.15,57.00" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Pos tag Noun</figDesc><graphic coords="4,80.79,65.61,203.07,122.41" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Pos tag Verb</figDesc><graphic coords="4,311.41,74.47,203.08,113.32" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Figure 4 : 5 :</head><label>45</label><figDesc>Figure 4: Pos tag Punctuation Figure 5: Pos tag Adposition</figDesc><graphic coords="4,80.79,230.42,203.07,114.26" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head>Figure 6 : 7 :</head><label>67</label><figDesc>Figure 6: Pos Tag Determiner Figure 7: Pos tag Pronoun</figDesc><graphic coords="4,80.79,387.88,203.07,111.36" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_5"><head>then</head><label></label><figDesc>Load a pre-trained BERT tokenizer and model from Hugging Face's model hub. Create instances of the CustomDataset class for the training and validation sets. Create data loaders for the training</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head>Figure 8 :</head><label>8</label><figDesc>Figure 8: overall prediction process</figDesc><graphic coords="5,162.25,65.60,270.78,362.26" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_7"><head>Figure 9 :</head><label>9</label><figDesc>Figure 9: Model Training Process</figDesc><graphic coords="6,93.32,65.60,406.16,147.49" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_8"><head></head><label></label><figDesc>" (70 characters) -Average line length: 70 characters • Word Density Calculation: The word density (𝑊 𝐷) can be calculated using the formula: 𝑊 𝐷 = 100 × Vocabulary Size No of Lines × Average Line Length (1) Where: 𝑊 𝐷 : Word Density Vocabulary Size : Number of unique words in the text No of Lines : Total number of lines in the text Average Line Length : Average number of characters per line So, the word density of the text is approximately 14.29.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_9"><head>-</head><label></label><figDesc>Definition: Words representing people, places, things, or ideas. -Examples: "cat, " "city, " "happiness. " -Usage: "The cat is sleeping. " • Verb -Definition: Words that describe actions, states, or occurrences. -Examples: "run, " "is, " "seem. " -Usage: "She runs every morning. " • Punctuation -Definition: Symbols used to separate sentences and their elements and to clarify meaning. -Examples: ". ", ", ", "!" -Usage: "Hello, world!" • Determiner -Definition: Determiners are words placed before nouns to specify quantity or definiteness. -Examples: "the, " "a, " "some. " -Usage: "The apple is red. " • Pronoun -Definition: Pronouns are words that replace nouns. -Examples: "he, " "they, " "it. " -Usage: "She loves her dog. " • Proper Noun -Definition: Proper nouns are specific names of people, places, or organizations. -Examples: "John, " "Paris, " "Google. " -Usage: "Google is a search engine. " • Adjective -Definition: Adjectives are words that describe or modify nouns. -Examples: "happy, " "blue, " "tall. " -Usage: "The tall building is new. " • Auxiliary Verb -Definition: Auxiliary verbs are used with main verbs to express tense, mood, or voice. -Examples: "is, " "have, " "will. " -Usage: "She is running. " • Adverb -Definition: Adverbs modify verbs, adjectives, or other adverbs.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>The following table represents pan-24 dataset statistics and feature extraction data.</figDesc><table><row><cell>Model</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Dataset size Word density Vocab Avg line length NOUN PUNC VERB ADP DET PRON ADJ AUX ADV CCONJ PROPN PART SCONJ NUM</head><label></label><figDesc></figDesc><table><row><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell></cell><cell>X</cell><cell>INTJ</cell></row><row><cell>human</cell><cell>1087</cell><cell>4.56</cell><cell>29820</cell><cell>27.39</cell><cell>17.17</cell><cell>10.61</cell><cell cols="2">14.0 10.25 8.6</cell><cell>3.94</cell><cell cols="3">5.77 0.81 3.23</cell><cell>2.59</cell><cell>10.64</cell><cell>0.29</cell><cell>10.25</cell><cell>1.72 0.012 0.013</cell></row><row><cell>alpaca-7b</cell><cell>1087</cell><cell>4.79</cell><cell>14409</cell><cell>30.82</cell><cell>16.38</cell><cell>10.52</cell><cell>9.04</cell><cell>9.88 8.63</cell><cell>1.98</cell><cell cols="3">5.37 0.76 1.83</cell><cell>2.99</cell><cell>15.99</cell><cell>0.12</cell><cell>9.88</cell><cell>1.4 0.0045 0.0065</cell></row><row><cell>bigscience-bloomz-7b1</cell><cell>1087</cell><cell>3.32</cell><cell>13938</cell><cell>24.18</cell><cell>17.66</cell><cell>8.72</cell><cell cols="2">10.54 11.06 9.41</cell><cell>2.7</cell><cell cols="2">5.36 0.7</cell><cell>2.03</cell><cell>2.35</cell><cell>15.48</cell><cell>0.19</cell><cell>11.06</cell><cell>2.1</cell><cell>0.005 0.0011</cell></row><row><cell>chav-inlo-alpaca-13b</cell><cell>1087</cell><cell>5.60</cell><cell>14476</cell><cell>28.82</cell><cell>16.37</cell><cell>9.07</cell><cell cols="2">10.17 9.97 8.43</cell><cell>1.95</cell><cell>5.92</cell><cell>1</cell><cell>2.06</cell><cell>3.15</cell><cell>20.28</cell><cell>0.1</cell><cell>9.97</cell><cell>1.48 0.0009 0.0009</cell></row><row><cell>gemini-pro</cell><cell>1087</cell><cell>4.15</cell><cell>24347</cell><cell>22.33</cell><cell>17.97</cell><cell>9.41</cell><cell cols="2">11.17 9.37 8.37</cell><cell>1.95</cell><cell cols="3">5.92 0.71 2.05</cell><cell>2.77</cell><cell>11.67</cell><cell>0.12</cell><cell>9.37</cell><cell>1.85 0.001 0.0013</cell></row><row><cell>gpt-3.5-turbo-0125</cell><cell>1087</cell><cell>4.57</cell><cell>22776</cell><cell>28.92</cell><cell>20.51</cell><cell>8.61</cell><cell cols="2">12.48 11.37 9.51</cell><cell>2.36</cell><cell cols="3">6.83 0.46 2.14</cell><cell>2.58</cell><cell>10.61</cell><cell>0.13</cell><cell>11.37</cell><cell>0.95 0.001 0.0011</cell></row><row><cell>gpt-4-turbo-preview</cell><cell>1087</cell><cell>3.99</cell><cell>26175</cell><cell>28.26</cell><cell>20.56</cell><cell>9.94</cell><cell cols="2">12.02 10.35 9.56</cell><cell>2.81</cell><cell cols="3">6.91 0.47 2.26</cell><cell>2.6</cell><cell>11.37</cell><cell>0.17</cell><cell>10.35</cell><cell>0.9 0.0014 0.0013</cell></row><row><cell>metallama 2-7b</cell><cell>1087</cell><cell>3.88</cell><cell>21431</cell><cell>26.58</cell><cell>19.07</cell><cell cols="3">10.28 10.88 8.99 9.99</cell><cell>3.66</cell><cell cols="3">5.98 0.74 2.21</cell><cell>3.14</cell><cell>9.13</cell><cell>0.12</cell><cell>8.99</cell><cell>1.4</cell><cell>0.003 0.0013</cell></row><row><cell>metallama-270b</cell><cell>1087</cell><cell>3.52</cell><cell>22422</cell><cell>25.16</cell><cell>18.52</cell><cell>9.98</cell><cell>12.3</cell><cell>9.39 8.98</cell><cell>3.6</cell><cell cols="3">5.91 0.62 2.26</cell><cell>3.04</cell><cell>11.56</cell><cell>0.12</cell><cell>9.39</cell><cell>1.15 0.0064 0.0032</cell></row><row><cell>mistralai-mistral-7b</cell><cell>1087</cell><cell>3.62</cell><cell>25147</cell><cell>24.67</cell><cell>18.3</cell><cell>11.15</cell><cell>9.55</cell><cell>9.76 8.43</cell><cell>3.2</cell><cell cols="3">5.62 0.67 2.26</cell><cell>3.04</cell><cell>11.56</cell><cell>0.12</cell><cell>9.76</cell><cell>1.24 0.0064 0.0032</cell></row><row><cell>mistralai-mixtral-8x7b</cell><cell>1087</cell><cell>3.92</cell><cell>26549</cell><cell>26.28</cell><cell>19.14</cell><cell cols="3">10.98 10.02 9.35 9.56</cell><cell>3.67</cell><cell cols="3">5.62 0.62 2.22</cell><cell>3.06</cell><cell>11.56</cell><cell>0.12</cell><cell>9.35</cell><cell>1.24 0.006 0.0024</cell></row><row><cell>qwen-qwen1.5-72b</cell><cell>1087</cell><cell>4.38</cell><cell>32658</cell><cell>27.84</cell><cell>18.19</cell><cell>10.56</cell><cell>10.6</cell><cell>9.7 9.36</cell><cell>3.16</cell><cell cols="2">5.35 0.59</cell><cell>2.1</cell><cell>3.05</cell><cell>11.44</cell><cell>0.1</cell><cell>9.7</cell><cell>1.51 0.003 0.0022</cell></row><row><cell>text-bison-002</cell><cell>1087</cell><cell>3.98</cell><cell>25960</cell><cell>26.01</cell><cell>19.16</cell><cell cols="3">10.56 12.83 9.22 9.19</cell><cell>3.66</cell><cell cols="3">6.65 0.59 2.23</cell><cell>2.88</cell><cell>11.44</cell><cell>0.1</cell><cell>9.22</cell><cell>1.01 0.0016 0.0016</cell></row><row><cell>vicugalle-gpt2-open-instruct-v1</cell><cell>1087</cell><cell>2.53</cell><cell>16920</cell><cell>30.03</cell><cell>17.68</cell><cell>9.41</cell><cell cols="2">12.8 10.45 9.35</cell><cell>3.16</cell><cell cols="3">5.9 0.69 2.02</cell><cell>3</cell><cell>13.32</cell><cell>0.13</cell><cell>10.45</cell><cell>1.56 0.0045 0.0047</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 2</head><label>2</label><figDesc>Hyperparameters that were applied to every experiment</figDesc><table><row><cell>Parameter</cell><cell>Value</cell></row><row><cell cols="2">Activation Function Sigmoid</cell></row><row><cell>Optimizer</cell><cell>AdamW</cell></row><row><cell>Loss Function</cell><cell>nn_crossentropy</cell></row><row><cell>Learning Rate</cell><cell>2 × 10 −5</cell></row><row><cell>Batch Size</cell><cell>32</cell></row><row><cell>Number of Epochs</cell><cell>3</cell></row><row><cell>Dropout</cell><cell>0.2</cell></row><row><cell>ModelCheckpoint</cell><cell>Yes</cell></row><row><cell>EarlyStopping</cell><cell>Yes</cell></row><row><cell>Patience</cell><cell>5</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Minimum 25-th Quantile Median 75-th Quantile Max</head><label></label><figDesc></figDesc><table><row><cell>direct-velocity</cell><cell>0.395</cell><cell>0.905</cell><cell>0.937</cell><cell>0.958</cell><cell>0.978</cell></row><row><cell>Baseline Binoculars</cell><cell>0.342</cell><cell>0.818</cell><cell>0.844</cell><cell>0.965</cell><cell>0.996</cell></row><row><cell>Baseline Fast-DetectGPT (Mistral)</cell><cell>0.095</cell><cell>0.793</cell><cell>0.842</cell><cell>0.931</cell><cell>0.958</cell></row><row><cell>Baseline PPMd</cell><cell>0.270</cell><cell>0.546</cell><cell>0.750</cell><cell>0.770</cell><cell>0.863</cell></row><row><cell>Baseline Unmasking</cell><cell>0.250</cell><cell>0.662</cell><cell>0.696</cell><cell>0.697</cell><cell>0.762</cell></row><row><cell>Baseline Fast-DetectGPT</cell><cell>0.159</cell><cell>0.579</cell><cell>0.704</cell><cell>0.719</cell><cell>0.982</cell></row><row><cell>95-th quantile</cell><cell>0.863</cell><cell>0.971</cell><cell>0.978</cell><cell>0.990</cell><cell>1.000</cell></row><row><cell>75-th quantile</cell><cell>0.758</cell><cell>0.865</cell><cell>0.933</cell><cell>0.959</cell><cell>0.991</cell></row><row><cell>Median</cell><cell>0.605</cell><cell>0.645</cell><cell>0.875</cell><cell>0.889</cell><cell>0.936</cell></row><row><cell>25-th quantile</cell><cell>0.353</cell><cell>0.496</cell><cell>0.658</cell><cell>0.675</cell><cell>0.711</cell></row><row><cell>Min</cell><cell>0.015</cell><cell>0.038</cell><cell>0.231</cell><cell>0.244</cell><cell>0.252</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 4</head><label>4</label><figDesc>Overview of the accuracy in detecting if a text is written by an human in task 4 on PAN 2024 (Voight-Kampff Generative AI Authorship Verification). We report ROC-AUC, Brier, C@1, F 1 , F 0.5𝑢 and their mean.directvelocity is the name of the LLM model implemented from this paper. The submission scores 8th out of 30 on the PAN CLEF generated content analysis leaderboard</figDesc><table><row><cell>Approach</cell><cell cols="3">ROC-AUC Brier C@1</cell><cell>F 1</cell><cell>F 0.5𝑢 Mean</cell></row><row><cell>direct-velocity</cell><cell>0.976</cell><cell cols="3">0.877 0.959 0.934 0.94 0.937</cell></row><row><cell>Baseline Binoculars</cell><cell>0.972</cell><cell cols="3">0.957 0.966 0.964 0.965 0.965</cell></row><row><cell>Baseline Fast-DetectGPT (Mistral)</cell><cell>0.876</cell><cell>0.8</cell><cell cols="2">0.886 0.883 0.883 0.866</cell></row><row><cell>Baseline PPMd</cell><cell>0.795</cell><cell cols="3">0.798 0.754 0.753 0.749 0.77</cell></row><row><cell>Baseline Unmasking</cell><cell>0.697</cell><cell cols="3">0.774 0.691 0.658 0.666 0.697</cell></row><row><cell>Baseline Fast-DetectGPT</cell><cell>0.668</cell><cell cols="3">0.776 0.695 0.69 0.691 0.704</cell></row><row><cell>95-th quantile</cell><cell>0.994</cell><cell cols="3">0.987 0.989 0.989 0.989 0.990</cell></row><row><cell>75-th quantile</cell><cell>0.969</cell><cell cols="3">0.925 0.950 0.933 0.939 0.941</cell></row><row><cell>Median</cell><cell>0.909</cell><cell cols="3">0.890 0.887 0.871 0.867 0.889</cell></row><row><cell>25-th quantile</cell><cell>0.701</cell><cell cols="3">0.768 0.683 0.657 0.670 0.689</cell></row><row><cell>Min</cell><cell>0.131</cell><cell cols="3">0.265 0.005 0.006 0.007 0.224</cell></row></table></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>We express our gratitude to the National Institute of Technology Silchar's Department of Computer Science and Engineering and the Center for Natural Language Processing (CNLP) for providing the necessary infrastructure and assistance for this study.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Overview of the &quot;Voight-Kampff&quot; Generative AI Authorship Verification Task at PAN and ELOQUENT</title>
		<author>
			<persName><forename type="first">J</forename><surname>Bevendorff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Wiegmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Karlgren</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Dürlich</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Gogoulou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Talman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Stamatatos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of CLEF 2024 -Conference and Labs of the Evaluation Forum</title>
		<title level="s">CEUR Workshop Proceedings</title>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><forename type="middle">G S</forename><surname>Herrera</surname></persName>
		</editor>
		<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="2024">2024. 2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Overview of PAN 2024: Multi-Author Writing Style Analysis, Multilingual Text Detoxification, Oppositional Thinking Analysis, and Generative AI Authorship Verification</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">A</forename><surname>Ayele</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Babakov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bevendorff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><forename type="middle">B</forename><surname>Casals</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Chulvi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Dementieva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Elnagar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Freitag</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Fröbe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Korenčić</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Mayerl</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Moskovskiy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Mukherjee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Panchenko</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Rangel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Rizwan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Rosso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Schneider</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Smirnova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Stamatatos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Stakovskii</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Taulé</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Ustalov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Wiegmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">M</forename><surname>Yimam</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Zangerle</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Experimental IR Meets Multilinguality, Multimodality, and Interaction. Proceedings of the Fifteenth International Conference of the CLEF Association (CLEF 2024)</title>
		<title level="s">Lecture Notes in Computer Science</title>
		<editor>
			<persName><forename type="first">L</forename><surname>Goeuriot</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Mulhem</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Quénot</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><surname>Schwab</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">L</forename><surname>Soulier</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><forename type="middle">M D</forename><surname>Nunzio</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">P</forename><surname>Galuščáková</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">A</forename><forename type="middle">G S</forename><surname>De Herrera</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">G</forename><surname>Faggioli</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Ferro</surname></persName>
		</editor>
		<meeting><address><addrLine>Berlin Heidelberg New York</addrLine></address></meeting>
		<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<monogr>
		<author>
			<persName><forename type="first">B</forename><surname>Guo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Jiang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Nie</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Ding</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Yue</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Wu</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2301.07597</idno>
		<title level="m">How close is chatgpt to human experts? comparison corpus, evaluation, and detection</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Nltk: the natural language toolkit</title>
		<author>
			<persName><forename type="first">S</forename><surname>Bird</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the COLING/ACL 2006 Interactive Presentation Sessions</title>
				<meeting>the COLING/ACL 2006 Interactive Presentation Sessions</meeting>
		<imprint>
			<date type="published" when="2006">2006</date>
			<biblScope unit="page" from="69" to="72" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Understanding stance classification of bert models: an attention-based framework</title>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">A C</forename><surname>Sáenz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Becker</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Knowledge and Information Systems</title>
		<imprint>
			<biblScope unit="volume">66</biblScope>
			<biblScope unit="page" from="419" to="451" />
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<monogr>
		<title level="m" type="main">Spotting llms with binoculars: Zero-shot detection of machine-generated text</title>
		<author>
			<persName><forename type="first">A</forename><surname>Hans</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Schwarzschild</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Cherepanova</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Kazemi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Saha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Goldblum</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Geiping</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Goldstein</surname></persName>
		</author>
		<ptr target="https://arxiv.org/abs/2401.12070.arXiv:2401.12070" />
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<monogr>
		<title level="m" type="main">Detectgpt: Zero-shot machinegenerated text detection using probability curvature</title>
		<author>
			<persName><forename type="first">E</forename><surname>Mitchell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Khazatsky</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Finn</surname></persName>
		</author>
		<ptr target="https://arxiv.org/abs/2301.11305.arXiv:2301.11305" />
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Compression and machine learning: a new perspective on feature space vectors</title>
		<author>
			<persName><forename type="first">D</forename><surname>Sculley</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Brodley</surname></persName>
		</author>
		<idno type="DOI">10.1109/DCC.2006.13</idno>
	</analytic>
	<monogr>
		<title level="m">Data Compression Conference (DCC&apos;06)</title>
				<imprint>
			<date type="published" when="2006">2006</date>
			<biblScope unit="page" from="332" to="341" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Generalizing unmasking for short texts</title>
		<author>
			<persName><forename type="first">J</forename><surname>Bevendorff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Stein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Hagen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Potthast</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/N19-1068</idno>
		<ptr target="https://aclanthology.org/N19-1068.doi:10.18653/v1/N19-1068" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
		<title level="s">Long and Short Papers</title>
		<editor>
			<persName><forename type="first">J</forename><surname>Burstein</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">C</forename><surname>Doran</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">T</forename><surname>Solorio</surname></persName>
		</editor>
		<meeting>the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies<address><addrLine>Minneapolis, Minnesota</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="654" to="659" />
		</imprint>
	</monogr>
	<note>Association for Computational Linguistics</note>
</biblStruct>

<biblStruct xml:id="b9">
	<monogr>
		<title level="m" type="main">Fast-detectgpt: Efficient zero-shot detection of machinegenerated text via conditional probability curvature</title>
		<author>
			<persName><forename type="first">G</forename><surname>Bao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Teng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Zhang</surname></persName>
		</author>
		<ptr target="https://arxiv.org/abs/2310.05130" />
		<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
