<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Battling Hateful Content in Indic Languages HASOC &apos;21</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Aditya</forename><surname>Kadam</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Anmol</forename><surname>Goel</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jivitesh</forename><surname>Jain</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jushaan</forename><surname>Singh Kalra</surname></persName>
							<affiliation key="aff1">
								<orgName type="institution">Delhi Technological University</orgName>
								<address>
									<settlement>Delhi</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Mallika</forename><surname>Subramanian</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Manvith</forename><surname>Reddy</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Prashant</forename><surname>Kodali</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">T</forename><forename type="middle">H</forename><surname>Arjun</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Manish</forename><surname>Shrivastava</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Ponnurangam</forename><surname>Kumaraguru</surname></persName>
							<affiliation key="aff0">
								<orgName type="department">International Institute of Information Technology</orgName>
								<address>
									<settlement>Hyderabad</settlement>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff2">
								<orgName type="department">Forum for Information Retrieval Evaluation</orgName>
								<address>
									<addrLine>December 13-17</addrLine>
									<postCode>2021</postCode>
									<country key="IN">India</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Battling Hateful Content in Indic Languages HASOC &apos;21</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">8D6279535FC4E5A3B8DDFCC0AFDF6142</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-25T01:32+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Hate Speech</term>
					<term>Social Media</term>
					<term>Code Mixed</term>
					<term>Indic Languages</term>
					<term>Transformer Architecture</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>The extensive rise in consumption of online social media (OSMs) by a large number of people poses a critical problem of curbing the spread of hateful content on these platforms. With the growing usage of OSMs in multiple languages, the task of detecting and characterizing hate becomes more complex. The subtle variations of code-mixed texts along with switching scripts only add to the complexity. This paper presents a solution for the HASOC 2021 Multilingual Twitter Hate-Speech Detection challenge by team PreCog IIIT Hyderabad. We adopt a multilingual transformer based approach and describe our architecture for all 6 subtasks as part of the challenge. Out of the 6 teams that participated in all the subtasks, our submissions rank 3rd overall.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Dissemination of hateful content on nearly all social media is increasingly becoming an alarming concern. In the research community as well, this is a heavily studied research problem <ref type="bibr" target="#b0">[1,</ref><ref type="bibr" target="#b1">2,</ref><ref type="bibr" target="#b2">3,</ref><ref type="bibr" target="#b3">4,</ref><ref type="bibr" target="#b4">5]</ref>. Misconduct such as bullying, derogatory comments based on gender, race, religion, threatening remarks etc. are more prevalent today than ever before. The repercussions that such content can have is profound and can result in increased mental stress, emotional outburst and negative psychological impacts <ref type="bibr" target="#b5">[6]</ref>. Hence, curbing the proliferation of this hate speech is imperative. Furthermore, the massive scale at which online social media platforms function makes it an even more pressing issue, which needs to be addressed in a robust manner. Most online social media platforms have imposed strict guidelines 1 2 3 to help prevent the spread of hate. In spite of these platform regulations, the dynamics of user-interaction influence the diffusion of (and hence increase in) hate to a large extent <ref type="bibr" target="#b0">[1]</ref>.</p><p>The problem of hate speech has been addressed by several researchers, but the rise in multilingual content has added to the complexity of identification of hateful content. Majority of these studies deal with high-resource languages such as English, and only recently have low-resource languages -such as several Indic Languages -been more deeply explored <ref type="bibr" target="#b6">[7]</ref>. In a country like India, with multitude of regional languages, the phenomenon of Code Mixing/Switching (wherein linguistic units such as phrases/words of two languages occur in a single utterance) is also pervasive.</p><p>In this paper we elucidate our approach in solving the six downstream tasks of hate speech identification and characterization in Indian languages as a part of the 'HASOC '21 Hate Speech and Offensive Content Identification in English and Indo-Aryan Languages' challenge <ref type="bibr" target="#b7">[8]</ref>. Motivated by existing architectures, we curate our own pipeline by fusing fine-tuned transformer based models with additional features to solve this challenge and highlight the different methodologies that were adopted for the three languages -English, Hindi, Marathi, and Code Mixed Hindi -English. We also make our code, methodology and approach public to the research community. <ref type="foot" target="#foot_0">4</ref></p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Literature Review</head><p>Discerning hateful content on social media is an already tricky problem given the challenges associated with it, for instance disrespectful/abusive words could be censored in text, some expressions may not be inherently offensive, however they can be so in the right context <ref type="bibr" target="#b8">[9]</ref>. Owing to the conversational design of social media wherein users can reply to a given comment (either support, refute or irrelevant to the original message), the build-up of threads in response to a hateful message can also intensify hate even if the reply is not hateful on its own. The evolution of such hate intensity has shown diverse patterns and no direct correlation to the parent tweet which makes the task of hate speech detection more difficult <ref type="bibr" target="#b9">[10]</ref>.</p><p>Significant amount of research has been conducted to evaluate traditional NLP approaches such as character level CNNs, word embedding based approaches and the myriad of variations with LSTMs (sub-word level, hierarchical, BiLSTMs) <ref type="bibr" target="#b10">[11]</ref>. Likewise, Machine Learning algorithms including SVMs, K-Nearest Neighbours, Multinomial Naive Bayes (MNB) and their respective performances in multilingual text settings have also been explored <ref type="bibr" target="#b11">[12,</ref><ref type="bibr" target="#b6">7,</ref><ref type="bibr" target="#b12">13]</ref>. Investigating categories of profane words that are commonly used in hate speech is another non-trivial subtask under the hate detection umbrella, primarily because of the different interpretations of words in different cultures/demographics, adaptation of slangs in newer generations etc <ref type="bibr" target="#b13">[14]</ref>.</p><p>In recent times however, with the introduction of Transformer based models and their performance in Natural Language Understanding (NLU) tasks, significant work has been done in order to adapt these for multilingual texts as well to leverage transfer between languages. Models such as XLMR, mBERT, MuRIL, RemBERT have gained much popularity and have shown promising results <ref type="bibr" target="#b14">[15,</ref><ref type="bibr" target="#b15">16,</ref><ref type="bibr" target="#b16">17]</ref>. Transfer learning based approaches that leverage performance of high resource languages accompanied with CNN classification heads have also shown significant improvements in capturing hateful content on social media platforms <ref type="bibr" target="#b17">[18,</ref><ref type="bibr" target="#b18">19]</ref>. Sharing and re-utilizing the model weights learnt whilst training on a corpus for a high resource language can aid the process of training for languages that are still under explored <ref type="bibr" target="#b19">[20]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Dataset</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Dataset &amp; Task Description</head><p>Subtask 1 consisted of data for 3 languages, namely -English, Hindi and Marathi <ref type="bibr" target="#b20">[21,</ref><ref type="bibr" target="#b21">22]</ref>. For English and Hindi, the task was further subdivided into 2 sub-parts: a) Identification of hateful v/s non-hateful content and b) Characterizing the kind of hate present in a tweeteither Profane, Hateful, Offensive or None. The distribution of the different data classes for each of the three languages is shown in Table <ref type="table">1</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 1</head><p>Distribution of the HASOC 2021 dataset for each task and its associated subtasks. For each language and task, the corresponding number of tweets per class is shown below. Tweets pertaining to Subtask 1 are in three languages -English, Hindi, Marathi. The focus of Subtask 2 was binary classification: Hate &amp; Offensive or Non Hate-Offensive. The given dataset distribution for this task is shown in Table <ref type="table">1</ref>. This data was accompanied by the following additions <ref type="bibr" target="#b22">[23]</ref>:</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Task</head><p>• Tweets are English -Hindi Code Mix sentences, and • Classification should not be based on the tweet alone, but should also account for the context as well.</p><p>For example : Consider that in a tweet thread, tweet A is a reply to tweet B. For classifying tweet A, the model can leverage the information from the parent tweet -tweet B.</p><p>Figure <ref type="figure" target="#fig_6">3b</ref> demonstrates the relationship between the tweets to be classified and their contexts.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.">Preprocessing Data</head><p>As a precursor to applying any NLP models on text data, we pre-processed the dataset with standard techniques. Given that the data from Twitter is bound to have certain amount of noise and unwanted elements such as -URLs, mentions etc, these were removed from the tweet texts.</p><p>Hashtags have a slightly different contribution to analysis of the tweet since they may or may not contribute positively in the classification task. Through the results from our experiments, we observed that omitting the hashtags proved to work better, and hence they were cleaned from the tweet as well. Since the data is code mixed, not only in terms of the combination of languages but also with respect to scripts (some English text is written in Roman script, whereas some Hindi text is written in Devanagari apart from Roman), we also normalize the Indic language scripts for  Marathi and Hindi. In addition to that, we removed stop words for the Marathi dataset using this list. 5 Finally, punctuations were also removed from the dataset texts. An interesting observation was that for the task of hate detection, the presence of emojis converted to text in the tweets did not improve the performance of our models significantly (rather it reduced the scores by some margin). However, including emojis along with text while classifying hate did have a positive impact since the emoji-text conversion was able to capture hints of sentiment and indirect offensive/profane content. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Methodology</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>. English Classifiers</head><p>For the English Subtask 1, the architecture that resulted in the best performance is an ensemble of the following models:</p><p>• Fine-tuned BERTweet model <ref type="bibr" target="#b23">[24]</ref> • Fine-tuned XLM-Roberta <ref type="bibr" target="#b15">[16]</ref> with CNN Head</p><p>We use XLM-R, a multilingual model, along with the monolingual model in the ensemble as we found that some of the text in the training set has transliterated Hindi along with some Devanagari text. We extracted textual features such as distribution of '?', '!', capital letters etc. 5 https://github.com/stopwords-iso/stopwords-mr</p><p>We also use the percentage of profane words and sentiment of the text as a feature. We use profane words list curated from various sources such as words/cuss<ref type="foot" target="#foot_1">6</ref> , zacanger/profane-words <ref type="foot" target="#foot_2">7</ref> , t-davison/lexicons. <ref type="foot" target="#foot_3">8</ref> For sentiment analysis we use the TweetEval <ref type="bibr" target="#b24">[25]</ref> model and use its softmax output as a feature to our models.</p><p>Inspired by Kim <ref type="bibr" target="#b25">[26]</ref> we pass the embedding (concatenated last 4 hidden layers) to a CNN and max-pool convolution layers of various widths to a fully connected layer of size 128 with dropout. We concatenate this 128 dimensional vector with our feature vector. We pass this output onto a dense output layer with softmax activation and cross entropy loss as shown in Figure <ref type="figure" target="#fig_1">1</ref>.</p><p>Along with the previous models, we fine-tune BERTweet, a pre-trained language model for English tweets. BERTweet has the same architecture as BERT and is trained on the pre-training procedure of RoBERTa, but it is trained solely on tweets, thus, making it a viable alternative and suitable for our task. This model has shown state-of-the-art results on tasks based on tweets <ref type="bibr" target="#b23">[24]</ref>. We use the encoder architecture and pass the pooled output through a linear layer for the classification which uses softmax activation and cross-entropy loss as shown in the Figure <ref type="figure" target="#fig_1">1</ref>.</p><p>We also train the models on the previous years datasets but notice that this does not increase the performance of the models but actually degrades the performance in Task 1B due to skewed distribution of classes. Transliteration of emojis didn't improve the performance. The class imbalance in Subtask 1-B degraded the performance of our models hence we tried to improve upon it by using a weighted loss function but we notice that this decreases the performance and that the domain specific distribution is actually helping the models. We also perform K-Fold Validation and use early stopping to avoid over-fitting. We average the probabilities of each class across folds and the two models in our ensemble.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.2.">Hindi &amp; Marathi Classifier</head><p>For both the Hindi and Marathi language, the architecture that performed the best utilized the XLM-R transformer model. This model was able to capture the code-mixed and multilingual nature of the tweets dataset. To amplify the results, we leveraged intermediary representations of the language model as well as textual features that were extracted from the tweets. In particular, we utilized the Multilingual MiniLM language model for fine-tuning on Hindi Subtask 1-B. We observed that MiniLM with Focal Loss instead of Cross Entropy Loss performed better than other baselines in the imbalanced multi-class setting of Hindi Subtask 1-B. Focal Loss compensates for class imbalance with a factor that increases the network's sensitivity towards mis-classified samples.</p><p>Inspired by Mozafari et al. <ref type="bibr" target="#b17">[18]</ref> we use the pre-trained representations of the text from 12 hidden layer of XLM-R model (each of 768 dimensions) and then apply a CNN layer with a kernel size of 3. The output is then passed through a soft-max following which the cross-entropy loss is computed whilst training. This model architecture is represented in Figure <ref type="figure" target="#fig_4">2</ref>. Tuning hyperparameters such as optimizers, loss functions and dropout layers, we experiment with  different options. For the optimizers we try Adadelta and Adam optimizers with Adam working out better. Amongst all loss functions, the Cross Entropy Loss performed the best. As for the dropout layers we explore dropouts in the range 0.1-0.5 and use 0.5 as the final dropout for the model architecture.</p><p>We further augment the model features, with two kinds of textual features -fraction of profane words and sentiment of the tweet. Due to lack of resources for Marathi we catalogue<ref type="foot" target="#foot_4">9</ref> a list of profane words in Marathi and use this to find the fraction of profane words in a tweet. For Hindi, we curate a list of profane words by collating and appending to existing lists <ref type="foot" target="#foot_5">10</ref> , and use this to score each tweet. As for the sentiment of the tweet, we incorporated off-theshelf HuggingFace models to obtain the positive, negative and neutral scores for a tweet 11 12 . Although the textual features improved the performance for Hindi only by a small margin, for Marathi, manually extracted textual features helped in achieving a significant boost.</p><p>For the Marathi Subtask 1, we experimented with a voting ensemble of the XLM-Roberta with CNN Head using the following features:</p><p>• Word Embedding + Fraction of Profane Words + Sentiment Polarity • Word Embedding + Sentiment Polarity  • Word Embedding However we noticed that the base model with the embedding and the textual features performed better on the leaderboard.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Subtask 2 : Identification of Conversational Hate-Speech in Code-Mixed Languages (ICHCL)</head><p>The tweets for Subtask 2 are code mixed. While the Transformer based encoder models have performed well on various monolingual NLU tasks, their performance does not reach the same level on code mixed sentences. Multilingual transformer based models, have been applied for various code mixed NLU tasks, and have performed better than monolingual transformer based models <ref type="bibr" target="#b26">[27]</ref>. For this task, we use XLM-RoBERTa <ref type="bibr" target="#b15">[16]</ref>. To capture the context and the tweet itself, we modify the input in the following manner, where [CLS] , [SEP] are part of the vocabulary of model, and are used to classify and take multiple sentences as input, respectively.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>[CLS] &lt;Tweet text to be classified&gt; [SEP] &lt;context of parent tweet&gt; [SEP]</head><p>Here, &lt;Tweet text to be classified&gt; is the text of the tweet/comment/reply that is being classified, while &lt;Context of parent tweet&gt; is either just the parent tweet or concatenation of parent tweet and comment, depending on weather the text to be classified is a tweet or a comment or a reply. While classifying a standalone tweet, the context is left empty. The Hindi corpus used to train XLM-Roberta is in Devanagari script, while there is only a small portion of the corpus which is in Romanised form. With the hypothesis that the performance of model will improve if the Hindi tokens are in Devanagari script, we used CSNLI tool <ref type="foot" target="#foot_8">13</ref> to convert the Romanised tokens to Devanagari script. However, this normalisation only had a marginal impact on the final performance of the model. We used Huggingface's Trainer API to train the XLM-R model, and the hyperparameters were chosen using the hyperparameter search functionality offered by Trainer API.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 2</head><p>Performance scores for each of the six subtasks in terms of their test accuracy percentage and Macro F1 scores. All the architectures that were experimented and tested out are tabulated here. We can observe form the results that XLM-R combined with CNN classifier head works best across the languages of Subtask 1, while for Subtask 2, XLM-R with normalised input text performs the best in our experiments. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Language</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Experiments</head><p>We used Huggingface Transformers <ref type="bibr" target="#b27">[28]</ref> library for implementing the classifiers. For hyper parameter tuning we use Optuna Framework<ref type="foot" target="#foot_9">14</ref> library. Exploring multiple architectures simultaneously, we also tried ensembling an odd number of models following a majority rule based selection. For the English Subtask 1 we also did ensembling with averaged softmax probabilities. However, the increase in complexity of the classification pipeline did not necessarily improve performance scores, considering the size and the distribution of the dataset for Hindi and Marathi but helped in English. Table <ref type="table">2</ref> captures the Accuracies and F1 scores (corresponding to submissions made on the leaderboard) of all our models for each of the subtasks.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusion</head><p>In this paper, we presented our approaches for Hate Speech detection on Indian Languages and code mix between Hindi-English using multilingual transformer based encoder models. Although, in this work we have employed different models to address individual language specific subtasks, a multi-task single model based approach, which performs well across all the language pairs, would be an interesting challenge, which we wish to explore as a future work. In addition to this, as part of future work, we would like to improve the performance by carrying out an additional step of domain adaptive pre-training of the encoder models, and an efficient ensemble of multilingual encoder models.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head></head><label></label><figDesc>(a) Using BerTweet model with a MLP classifier head. (b) Combining CNN features over XLM-R output and manually generated feature vectors.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Architecture and pipeline for the models used for the downstream task of hate detection and classification for the English language for Subtask 1.</figDesc><graphic coords="4,122.65,84.19,140.00,194.85" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>4. 1 .</head><label>1</label><figDesc>Subtask 1: Identifying Hate, offensive and profane content from the post 4.1.1</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head></head><label></label><figDesc>(a) The base architecture for Hindi &amp; Marathi languages for Subtask 1 using XLM-R with CNN augmented with textual features vector followed by a softmax layer. (b) Multilingual MiniLM architecture adopted to overcome class imbalance while characterizing hate for the Hindi Subtask 1-B.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Architecture and pipeline for the models used for the downstream task of hate detection and classification for the Hindi &amp; Marathi language for Subtask 1.</figDesc><graphic coords="6,103.40,84.42,176.00,191.20" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_5"><head></head><label></label><figDesc>(a) Model pipeline for hate detection in conversational threads for Subtask 2.(b) Hierarchy of a conversation thread and its associated comments.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Model Pipeline and tweet conversation thread example for Subtask 2.</figDesc><graphic coords="7,303.86,129.80,200.02,150.01" type="bitmap" /></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_0">https://github.com/Adi2K/Precog-HASOC-2021</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_1">https://github.com/words/cuss</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="7" xml:id="foot_2">https://github.com/zacanger/profane-words</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_3">https://github.com/t-davidson/hate-speech-and-offensive-language/tree/master/lexicons</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="9" xml:id="foot_4">https://github.com/Adi2K/MarathiSwear</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="10" xml:id="foot_5">https://github.com/neerajvashistha/online-hate-speech-recog/blob/master/data/hi/ Hinglish-Offensive-Text-Classification/Hinglish_Profanity_List.csv</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="11" xml:id="foot_6">https://huggingface.co/l3cube-pune/MarathiSentiment</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="12" xml:id="foot_7">https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="13" xml:id="foot_8">https://github.com/irshadbhat/csnli</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="14" xml:id="foot_9">https://optuna.org/</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>We would like to thank the organisers of HASOC'21 Shared task for addressing a crucial problem of hate speech in Indian languages by releasing data resources, and for the smooth conduct of the competition. We would also like to specially thank all members of our research lab, PreCog, for the constructive suggestions during the whole process.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Spread of hate speech in online social media</title>
		<author>
			<persName><forename type="first">B</forename><surname>Mathew</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Dutt</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Goyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Mukherjee</surname></persName>
		</author>
		<idno type="DOI">10.1145/3292522.3326034</idno>
		<idno>doi:10.1145/3292522.3326034</idno>
		<ptr target="https://doi.org/10.1145/3292522.3326034" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 10th ACM Conference on Web Science, WebSci &apos;19</title>
				<meeting>the 10th ACM Conference on Web Science, WebSci &apos;19<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="173" to="182" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Analyzing the targets of hate in online social media</title>
		<author>
			<persName><forename type="first">L</forename><surname>Silva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Mondal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Correa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Benevenuto</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><surname>Weber</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Tenth international AAAI conference on web and social media</title>
				<imprint>
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Hate speech detection and racial bias mitigation in social media based on bert model</title>
		<author>
			<persName><forename type="first">M</forename><surname>Mozafari</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Farahbakhsh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Crespi</surname></persName>
		</author>
		<idno type="DOI">10.1371/journal.pone.0237861</idno>
		<ptr target="https://doi.org/10.1371/journal.pone.0237861.doi:10.1371/journal.pone.0237861" />
	</analytic>
	<monogr>
		<title level="j">PLOS ONE</title>
		<imprint>
			<biblScope unit="volume">15</biblScope>
			<biblScope unit="page" from="1" to="26" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Vulnerable community identification using hate speech detection on social media</title>
		<author>
			<persName><forename type="first">Z</forename><surname>Mossie</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J.-H</forename><surname>Wang</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.ipm.2019.102087</idno>
		<ptr target="https://doi.org/10" />
	</analytic>
	<monogr>
		<title level="j">Information Processing &amp; Management</title>
		<imprint>
			<biblScope unit="volume">57</biblScope>
			<biblScope unit="page">102087</biblScope>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Un-compromised credibility: Social media based multi-class hate speech classification for text</title>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">A</forename><surname>Qureshi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Sabih</surname></persName>
		</author>
		<idno type="DOI">10.1109/ACCESS.2021.3101977</idno>
	</analytic>
	<monogr>
		<title level="j">IEEE Access</title>
		<imprint>
			<biblScope unit="volume">9</biblScope>
			<biblScope unit="page" from="109465" to="109477" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Prevalence and psychological effects of hateful speech in online college communities</title>
		<author>
			<persName><forename type="first">K</forename><surname>Saha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Chandrasekharan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">De</forename><surname>Choudhury</surname></persName>
		</author>
		<idno type="DOI">10.1145/3292522.3326032</idno>
		<idno>doi:10.1145/3292522.3326032</idno>
		<ptr target="https://doi.org/10.1145/3292522.3326032" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 10th ACM Conference on Web Science, WebSci &apos;19</title>
				<meeting>the 10th ACM Conference on Web Science, WebSci &apos;19<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="255" to="264" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">An evaluation of multilingual offensive language identification methods for the languages of india</title>
		<author>
			<persName><forename type="first">T</forename><surname>Ranasinghe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zampieri</surname></persName>
		</author>
		<idno type="DOI">10.3390/info12080306</idno>
		<ptr target="https://www.mdpi.com/2078-2489/12/8/306.doi:10.3390/info12080306" />
	</analytic>
	<monogr>
		<title level="j">Information</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Overview of the HASOC Subtrack at FIRE 2021: Hate Speech and Offensive Content Identification in English and Indo-Aryan Languages and Conversational Hate Speech</title>
		<author>
			<persName><forename type="first">S</forename><surname>Modha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Mandl</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">K</forename><surname>Shahi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Madhu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Satapara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Ranasinghe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zampieri</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">FIRE 2021: Forum for Information Retrieval Evaluation, Virtual Event</title>
				<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2021-12">December 2021. 2021</date>
			<biblScope unit="page" from="13" to="17" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Challenges of hate speech detection in social media</title>
		<author>
			<persName><forename type="first">G</forename><surname>Kovács</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Alonso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Saini</surname></persName>
		</author>
		<idno type="DOI">10.1007/s42979-021-00457-3</idno>
		<ptr target="10.1007/s42979" />
	</analytic>
	<monogr>
		<title level="j">SN Computer Science</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="page">95</biblScope>
			<date type="published" when="0457">2021. -0 2 1 -0 0 4 5 7 -3</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Would your tweet invoke hate on the fly? forecasting hate intensity of reply threads on twitter</title>
		<author>
			<persName><forename type="first">S</forename><surname>Dahiya</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sharma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Sahnan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Goel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Chouzenoux</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Elvira</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Majumdar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Bandhakavi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Chakraborty</surname></persName>
		</author>
		<idno type="DOI">10.1145/3447548.3467150</idno>
		<idno>doi:10.1145/3447548.3467150</idno>
		<ptr target="https://doi.org/10.1145/3447548.3467150" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining, KDD &apos;21</title>
				<meeting>the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining, KDD &apos;21<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="2732" to="2742" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Hate speech detection in hindi-english code-mixed social media text</title>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">Y</forename><surname>Santosh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">V</forename><surname>Aravind</surname></persName>
		</author>
		<idno type="DOI">10.1145/3297001.3297048</idno>
		<ptr target=":10" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the ACM India Joint International Conference on Data Science and Management of Data, CoDS-COMAD &apos;19</title>
				<meeting>the ACM India Joint International Conference on Data Science and Management of Data, CoDS-COMAD &apos;19<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2019">2019</date>
			<biblScope unit="page" from="310" to="313" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">A comparative study of different state-of-the-art hate speech detection methods in Hindi-English code-mixed data</title>
		<author>
			<persName><forename type="first">P</forename><surname>Rani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Suryawanshi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Goswami</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">R</forename><surname>Chakravarthi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Fransen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">P</forename><surname>Mccrae</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2020.trac-1.7" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying, European Language Resources Association (ELRA)</title>
				<meeting>the Second Workshop on Trolling, Aggression and Cyberbullying, European Language Resources Association (ELRA)<address><addrLine>Marseille, France</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="42" to="48" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Machine learning techniques for hate speech classification of twitter data: State-of-the-art, future challenges and research directions</title>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">E</forename><surname>Ayo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Folorunso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">T</forename><surname>Ibharalu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><forename type="middle">A</forename><surname>Osinuga</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.cosrev.2020.100311</idno>
		<ptr target="https://doi.org/10.1016/j.cosrev.2020.100311" />
	</analytic>
	<monogr>
		<title level="j">Computer Science Review</title>
		<imprint>
			<biblScope unit="volume">38</biblScope>
			<biblScope unit="page">100311</biblScope>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Identifying and categorising profane words in hate speech</title>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">L</forename><surname>Teh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C.-B</forename><surname>Cheng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><forename type="middle">M</forename><surname>Chee</surname></persName>
		</author>
		<idno type="DOI">10.1145/3193077.3193078</idno>
		<idno>doi:10.1145/3193077.3193078</idno>
		<ptr target="https://doi.org/10.1145/3193077.3193078" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2nd International Conference on Compute and Data Analysis, ICCDA 2018</title>
				<meeting>the 2nd International Conference on Compute and Data Analysis, ICCDA 2018<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2018">2018</date>
			<biblScope unit="page" from="65" to="69" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">BERT: Pre-training of deep bidirectional transformers for language understanding</title>
		<author>
			<persName><forename type="first">J</forename><surname>Devlin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M.-W</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Toutanova</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/N19-1423</idno>
		<ptr target="https://aclanthology.org/N19-1423.doi:10.18653/v1/N19-1423" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
		<title level="s">Long and Short Papers</title>
		<meeting>the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies<address><addrLine>Minneapolis, Minnesota</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2019">2019</date>
			<biblScope unit="volume">1</biblScope>
			<biblScope unit="page" from="4171" to="4186" />
		</imprint>
	</monogr>
	<note>Association for Computational Linguistics</note>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Unsupervised cross-lingual representation learning at scale</title>
		<author>
			<persName><forename type="first">A</forename><surname>Conneau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Khandelwal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Goyal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Chaudhary</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Wenzek</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Guzmán</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Grave</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ott</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Zettlemoyer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Stoyanov</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2020.acl-main.747</idno>
		<ptr target="https://aclanthology.org/2020.acl-main.747.doi:10.18653/v1/2020.acl-main.747" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics</title>
				<meeting>the 58th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="8440" to="8451" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Muril: Multilingual representations for indian languages</title>
		<author>
			<persName><forename type="first">S</forename><surname>Khanuja</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Bansal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Mehtani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Khosla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Dey</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Gopalan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">K</forename><surname>Margam</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Aggarwal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">T</forename><surname>Nagipogu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Dave</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Gupta</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">C B</forename><surname>Gali</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Subramanian</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Talukdar</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">a r X i</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="issue">1</biblScope>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">A bert-based transfer learning approach for hate speech detection in online social media</title>
		<author>
			<persName><forename type="first">M</forename><surname>Mozafari</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Farahbakhsh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Crespi</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Complex Networks and Their Applications VIII</title>
				<editor>
			<persName><forename type="first">H</forename><surname>Cherifi</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">S</forename><surname>Gaito</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><forename type="middle">F</forename><surname>Mendes</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>Moro</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">L</forename><forename type="middle">M</forename><surname>Rocha</surname></persName>
		</editor>
		<meeting><address><addrLine>Cham</addrLine></address></meeting>
		<imprint>
			<publisher>Springer International Publishing</publisher>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="928" to="940" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Cross-lingual transfer learning for hate speech detection</title>
		<author>
			<persName><forename type="first">I</forename><surname>Bigoulaeva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Hangya</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Fraser</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/2021.ltedi-1.3" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion, Association for Computational Linguistics</title>
				<meeting>the First Workshop on Language Technology for Equality, Diversity and Inclusion, Association for Computational Linguistics<address><addrLine>Kyiv</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2021">2021</date>
			<biblScope unit="page" from="15" to="25" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<monogr>
		<title level="m" type="main">Multilingual offensive language identification for lowresource languages</title>
		<author>
			<persName><forename type="first">T</forename><surname>Ranasinghe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zampieri</surname></persName>
		</author>
		<idno>CoRR abs/2105.05996</idno>
		<ptr target="https://arxiv.org/abs/2105.05996.arXiv:2105.05996" />
		<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Overview of the HASOC subtrack at FIRE 2021: Hate Speech and Offensive Content Identification in English and Indo-Aryan Languages</title>
		<author>
			<persName><forename type="first">T</forename><surname>Mandl</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Modha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">K</forename><surname>Shahi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Madhu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Satapara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Majumder</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Schäfer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Ranasinghe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zampieri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Nandini</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">K</forename></persName>
		</author>
		<ptr target="http://ceur-ws.org/" />
	</analytic>
	<monogr>
		<title level="m">Working Notes of FIRE 2021 -Forum for Information Retrieval Evaluation</title>
				<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Cross-lingual offensive language identification for low resource languages: The case of marathi</title>
		<author>
			<persName><forename type="first">S</forename><surname>Gaikwad</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Ranasinghe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zampieri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">M</forename><surname>Homan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of RANLP</title>
				<meeting>RANLP</meeting>
		<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">Overview of the HASOC Subtrack at FIRE 2021: Conversational Hate Speech Detection in Code-mixed language</title>
		<author>
			<persName><forename type="first">S</forename><surname>Satapara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Modha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Mandl</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Madhu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Majumder</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Working Notes of FIRE 2021 -Forum for Information Retrieval Evaluation</title>
				<imprint>
			<publisher>CEUR</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">BERTweet: A pre-trained language model for English Tweets</title>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">Q</forename><surname>Nguyen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Vu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">T</forename><surname>Nguyen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
				<meeting>the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="9" to="14" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">TweetEval:Unified Benchmark and Comparative Evaluation for Tweet Classification</title>
		<author>
			<persName><forename type="first">F</forename><surname>Barbieri</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Camacho-Collados</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Espinosa-Anke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Neves</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of Findings of EMNLP</title>
				<meeting>Findings of EMNLP</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">Convolutional neural networks for sentence classification</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Kim</surname></persName>
		</author>
		<ptr target="https://aclanthology.org/D14-1181" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), Association for Computational Linguistics</title>
				<meeting>the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2014">2014</date>
			<biblScope unit="page" from="1746" to="1751" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<analytic>
		<title level="a" type="main">GLUECoS: An evaluation benchmark for code-switched NLP</title>
		<author>
			<persName><forename type="first">S</forename><surname>Khanuja</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Dandapat</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Srinivasan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sitaram</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Choudhury</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2020.acl-main.329</idno>
		<ptr target="https://aclanthology.org/2020.acl-main.329.doi:10.18653/v1/2020.acl-main.329" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics</title>
				<meeting>the 58th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="3575" to="3585" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<analytic>
		<title level="a" type="main">Transformers: State-of-the-art natural language processing</title>
		<author>
			<persName><forename type="first">T</forename><surname>Wolf</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Debut</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Sanh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chaumond</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Delangue</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Moi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Cistac</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Rault</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Louf</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Funtowicz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Davison</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Shleifer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Von Platen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Ma</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Jernite</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Plu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Xu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><forename type="middle">Le</forename><surname>Scao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Gugger</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Drame</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Q</forename><surname>Lhoest</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Rush</surname></persName>
		</author>
		<idno type="DOI">10.18653/v1/2020.emnlp-demos.6</idno>
		<ptr target="https://aclanthology.org/2020.emnlp-demos.6.doi:10.18653/v1/2020.emnlp-demos.6" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, Association for Computational Linguistics</title>
				<meeting>the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, Association for Computational Linguistics</meeting>
		<imprint>
			<date type="published" when="2020">2020</date>
			<biblScope unit="page" from="38" to="45" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
