<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-title-group>
        <journal-title>Forum for Information Retrieval Evaluation, December</journal-title>
      </journal-title-group>
    </journal-meta>
    <article-meta>
      <title-group>
        <article-title>of the HASOC Subtrack at FIRE 2021: Hate Speech and Ofensive Content Identification in English and Indo-Aryan Languages</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Thomas Mandl</string-name>
          <email>mandl@uni-hildesheim.de</email>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Sandip Modha</string-name>
          <email>sjmodha@gmail.com</email>
          <xref ref-type="aff" rid="aff2">2</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Gautam Kishore Shahi</string-name>
          <email>gautam.shahi@uni-due.de</email>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Hiren Madhu</string-name>
          <email>hirenmadhu16@gmail.com</email>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Shrey Satapara</string-name>
          <email>shreysatapara@gmail.com</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Prasenjit Majumder</string-name>
          <email>p_majumder@daiict.ac.in</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Johannes Schäfer</string-name>
          <email>johannes.schaefer@uni-hildesheim.de</email>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Tharindu Ranasinghe</string-name>
          <xref ref-type="aff" rid="aff10">10</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Marcos Zampieri</string-name>
          <email>marcos.zampieri@rit.edu</email>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Durgesh Nandini</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Amit Kumar Jaiswal</string-name>
          <xref ref-type="aff" rid="aff4">4</xref>
          <xref ref-type="aff" rid="aff6">6</xref>
          <xref ref-type="aff" rid="aff9">9</xref>
        </contrib>
        <contrib contrib-type="editor">
          <string-name>Evaluation, Deep Learning</string-name>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>DA-IICT</institution>
          ,
          <addr-line>Gandhinagar</addr-line>
          ,
          <country country="IN">India</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Indian Institute of Science</institution>
          ,
          <addr-line>Bangalore</addr-line>
          ,
          <country country="IN">India</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>LDRP-ITR</institution>
          ,
          <addr-line>Gandhinagar</addr-line>
          ,
          <country country="IN">India</country>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>Rochester Institute of Technology</institution>
          ,
          <country country="US">USA</country>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>Social Media</institution>
          ,
          <addr-line>Hate Speech, Ofensive Language, Multilingual Text Classification, Machine Learning</addr-line>
        </aff>
        <aff id="aff5">
          <label>5</label>
          <institution>University of Bamberg</institution>
          ,
          <country country="DE">Germany</country>
        </aff>
        <aff id="aff6">
          <label>6</label>
          <institution>University of Bedfordshire</institution>
          ,
          <country country="UK">United Kingdom</country>
        </aff>
        <aff id="aff7">
          <label>7</label>
          <institution>University of Duisburg-Essen</institution>
          ,
          <country country="DE">Germany</country>
        </aff>
        <aff id="aff8">
          <label>8</label>
          <institution>University of Hildesheim</institution>
          ,
          <country country="DE">Germany</country>
        </aff>
        <aff id="aff9">
          <label>9</label>
          <institution>University of Leeds</institution>
          ,
          <country country="UK">United Kingdom</country>
        </aff>
        <aff id="aff10">
          <label>10</label>
          <institution>University of Wolverhampton</institution>
          ,
          <country country="UK">United Kingdom</country>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2021</year>
      </pub-date>
      <volume>1</volume>
      <fpage>3</fpage>
      <lpage>17</lpage>
      <abstract>
        <p>The widespread of ofensive content online such as hate speech poses a growing societal problem. AI tools are necessary for supporting the moderation process at online platforms. For the evaluation of these identification tools, continuous experimentation with data sets in diferent languages are necessary. The HASOC track (Hate Speech and Ofensive Content Identification) is dedicated to develop benchmark data for this purpose. This paper presents the HASOC subtrack for English, Hindi, and Marathi. The data set was assembled from Twitter. This subtrack has two sub-tasks. Task A is a binary classification problem (Hate and Not Ofensive) ofered for all three languages. Task B is a fine-grained classification problem for three classes (HATE) Hate speech, OFFENSIVE and PROFANITY ofered for English and Hindi. Overall, 652 runs were submitted by 65 teams. The performance of the best classification algorithms for task A are F1 measures 0.91, 0.78 and 0.83 for Marathi, Hindi and English, respectively. This overview presents the tasks and the data development as well as the detailed results. The systems submitted to the competition applied a variety of technologies. The best performing algorithms were mainly variants of transformer architectures.</p>
      </abstract>
      <kwd-group>
        <kwd>Languages</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>
        There are various types of potentially harmful content in social media such as misinformation
and fake news [
        <xref ref-type="bibr" rid="ref1">1</xref>
        ], aggression [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ], cyber-bullying [3, 4], pejorative language [5], ofensive
language [6], online extremism [7], to name a few. The automatic identification of problematic
content has been receiving significant attention from the AI and NLP communities. In particular,
the identification of ofensive content, most notably hate speech, has been a growing research
area. Within this broad area, various related phenomena have been addressed in isolation such
as cyber-bulling, misogyny, aggression, and abuse [8, 9, 10] while some recent work has focused
on modeling multiple types of ofensive content at once [ 11, 12].
      </p>
      <p>While research in this area has been gaining momentum [13], there is increasing evidence
that social media platforms still struggle to keep up with the demand for technology, particularly
for languages other than English [14]. For example, a recent article pointed out that Facebook
does not have technology for identifying hate speech in the 22 oficial languages of India, its
biggest market worldwide.1</p>
      <p>
        To further contribute to the research in this field, the HASOC 2021 competition contributes
with empirically-driven research aiming to find the best methods for the identification of
ofensive content in social media. In its third edition, HASOC 2021 features re-runs of English
and Hindi tasks allowing for better comparison with the results from the editions HASOC 2019
[
        <xref ref-type="bibr" rid="ref3">15</xref>
        ] and HAOSC 2020 [
        <xref ref-type="bibr" rid="ref4">16</xref>
        ]. Marathi, a Indo-Aryan language similar to Hindi spoken by over 80
million people in India, was added as a new language in HAOSC 2021. A Subtask-2 including
conversational hate speech is described in an additional overview paper [
        <xref ref-type="bibr" rid="ref5">17</xref>
        ].
      </p>
    </sec>
    <sec id="sec-2">
      <title>2. Related Work</title>
      <p>This section briefly reviews related research on hate speech identification and data sets created
with this goal in mind.</p>
      <p>
        Current Benchmarks Recent shared task competitions organised such as TRAC [
        <xref ref-type="bibr" rid="ref2">2</xref>
        ], HASOC
[
        <xref ref-type="bibr" rid="ref6">18</xref>
        ] and OfensEval [
        <xref ref-type="bibr" rid="ref7">19</xref>
        ] have presented multiple datasets for hate speech and ofensive content
identification. While a clear majority of these competitions present English data, several recent
shared tasks have created new datasets for various languages such as Greek [
        <xref ref-type="bibr" rid="ref8">20</xref>
        ], Danish
[
        <xref ref-type="bibr" rid="ref9">21</xref>
        ], Mexican Spanish [
        <xref ref-type="bibr" rid="ref10">22</xref>
        ], and Turkish [
        <xref ref-type="bibr" rid="ref11">23</xref>
        ]. These data sets have influenced the creation of
machine learning models to automatically detect ofensive content, ranging from SVM models
[
        <xref ref-type="bibr" rid="ref12">24</xref>
        ] with traditional features to state-of-the-art transformer models [25]. As most of these
models typically require training data for each language, it is important to have training data
for various languages. Furthermore, one data set per language is not suficient because the
topics of hate speech could change, the potential bias of a data set cannot be easily revealed,
and the concept cannot be clearly defined but has a subjective component.
      </p>
      <p>These data sets can be categorised in to two main categories. Data sets such as Ofensive
Language Detection in Spanish Variants (MeOfendEs@IberLEF 2021) [ 26] and DEtection of
TOXicity in comments In Spanish (DETOXIS) [27] focus on general concepts of ofensive content
1https://www.nytimes.com/2021/10/23/technology/facebook-india-misinformation.html
while other data sets are dedicated to more specific topics than general ofensive content. A
recent data set for Russian which models hate against ethnic groups as a multi-class problem
[28] and Guest et al. [29] which has annotated misogyny as a multi-class problem are two recent
data sets that focus on specific topics in ofensive content identification.</p>
      <p>
        Annotation for Hate Speech The key activity in data set creation is annotation. Human
annotators need to decide whether the texts presented to them belong to one of the classes
relevant to the task. This process can be organised in diferent ways. There is no commonly
agreed best practice. Some researchers employ a small number of experts [29] or non-experts
[
        <xref ref-type="bibr" rid="ref3">15</xref>
        ] while others rely on crowd workers [30]. There is a high level of subjectivity associated
with the labelling and the class assignment. This can be more serious in cases of systematic bias
due to diferent knowledge levels about issues in society or even about language variants [ 31].
Also demographic features may lead to bias [32]. Sometimes users of data collections consider
some tweets as erroneously labelled. However, it needs to be taken into consideration that
the data providers need to follow a consistent protocol and deviations in the opinions about
individual tweets are natural. These cases of diferent options and individual standards form
part of any data set because typically, more than one person needs to work on the annotation.
      </p>
      <p>The typical method for measuring annotation quality is that some items are annotated at
least twice, and metrics for inter-rater agreement measures the agreement. In cases of low
agreement, it is unclear whether the reason behind this is a lack of common understanding
between the annotators or the collection contains many dubious cases. One study showed that
the agreement is substantially lower than for clear cases [33]. Before starting the annotation, it
is not clear how large the portion of dubious cases is. So, even the inter-rater agreement cannot
be a guarantee that the annotation is very good.</p>
      <p>Reliability of Data Sets Hate speech detection systems are not created only for research but
also for real-world applications. It is crucial not just to measure the quality of the classification
for one data set but also to analyse how well a system can generalise and be transferred to other
data sets. This would be an indicator for a high level of generalisability in realistic scenarios.</p>
      <p>
        Substantial experiments by Fortuna et al. [34] showed that training with one data set and
testing with another one can decrease the performance by over 30%. Many potential reasons can
be seen as obstacles for the generalisability [
        <xref ref-type="bibr" rid="ref13 ref14 ref15 ref16">35, 36, 37, 38</xref>
        ] such as dataset size and annotation
quality. However, little is known about their efects. Consequently, the creation of further hate
speech data sets is necessary not only for measuring the performance of classifiers but also
for the analysis of data sets, the creation processes, and measuring the reliability with new
methods.
      </p>
    </sec>
    <sec id="sec-3">
      <title>3. HASOC Task Overview and Data Set</title>
      <p>The HASOC 2021 dataset is another contribution to the growing body of resources for the
analysis of Hate Speech classification. In the following sections, the tasks and the creation
process of the data set are described.</p>
      <sec id="sec-3-1">
        <title>3.1. Task Definition</title>
        <p>This task focuses on Hate speech and Ofensive language identification for English, Hindi, and
Marathi. Sub-task A is a coarse-grained binary classification in which participating systems are
required to classify tweets into two classes, namely: Hate or Ofensive (HOF) vs Non-Hate and
Non-Ofensive (NOT).</p>
        <p>• HOF - Hate and Ofensive : This post contains hate, ofensive or profane content.
• NOT - Non Hate-Ofensive : This post does not contain any Hate Speech, profanity or
ofensive content. This post contains normal content, statements or anything else. If the
utterances are considered to be “normal” and not ofending to anyone, they should not be
labelled as this could be part of youth language or other language registers.
3.1.1. Sub-task B: Identifying Hate, profane and ofensive posts (fine-grained)
The second sub-task is a fine-grained classification task ofered for English and Hindi.
Hatespeech and ofensive posts from the sub-task A need to be further classified into the following
three categories:
• HATE - Hate speech: Posts under this class contain Hate speech content. Ascribing
negative attributes or deficiencies to groups of individuals because they are members of a
group (e.g. “all poor people are stupid”). These posts includes hateful comments toward
groups because of race, political opinion, sexual orientation, gender, social status, health
condition or similar.
• OFFN - Ofensive : Posts under this class contain ofensive content. Degrading,
dehumanizing or insulting an individual.
• PRFN - Profane: These posts contain profane words. Unacceptable language in the
absence of insults and abuse. This typically concerns the usage of obscenity, swearwords
(Fuck etc.) and cursing (Hell! Damn! etc.).</p>
      </sec>
      <sec id="sec-3-2">
        <title>3.2. Data Set Assembly</title>
        <p>
          The sampling of the data set was planned during the time when India was facing the second
and extremely hard COVID-19 wave. Therefore, during the sampling process, major topics
in social media are highly influenced by COVID-19, and these topics are frequent in the data
set [
          <xref ref-type="bibr" rid="ref17 ref18 ref19">39, 40, 41</xref>
          ]. In addition to this, tweets were also sampled about topics related to the brutal
post-poll violence in the Indian state West Bengal. Table 1 lists the topics and trending hashtags
which were used during the sampling period.
        </p>
        <p>
          To obtain potentially hateful tweets from the very large corpus of tweets, we have trained a
weak classifier based on SVM model with N-gram feature on the HASOC 2019 [
          <xref ref-type="bibr" rid="ref20">42</xref>
          ] and 2020
[
          <xref ref-type="bibr" rid="ref4">16</xref>
          ] data sets. The purpose of this was to create a weak binary classifier that gives an F1-score
around 0.5. We used this classifier to predict labels on the downloaded tweet corpus. We
randomly selected tweets classified as HOF (hateful/profane/ofensive) by the week classifier.
We randomly added 5% of the tweets which were not rated as belonging to the class HOF by
the classifier. The main rationale behind this merging process is to ensure that the final data set
Trending Hashtags
        </p>
        <p>Description of Topics
#ResignModi
#ModiKaVaccineJumla
#Murderer_Modi
#IndiaCovidCrisis
#TMCTerror
#BengalBurning
#ChineseWave
#chinesevirus
#communistvirus
#covidvaccine
#NoVaccinePassports
#chinavirus
#wuhanvirus
#islamophobia
#JusticeForShahabuddin</p>
        <p>Resignation of PM Modi over COVID-19 crisis in India
Controversy due to shortage of COVID-19 Vaccine
Death due to shortage of Oxygen attributed to Modi
Brutal second COVID-19 wave in India
West bengal Post-poll violence.</p>
        <p>West Bengal Post-poll violence.</p>
        <p>Anger on China
Racist tweets on Chinese
Hashtags trend by right-wing group
COVID-19 Vaccine
vaccine passport
Racist tweets on Chinese
COVID-19 Origin
Tweets related to hatred against Islam</p>
        <p>Death of Controversial Indian politician in India
contains a balanced distribution of hateful and non-hateful tweets. We downloaded additional
tweets using profane keywords to create an even more balanced data set. Table 2 lists examples
for diferent classes from the data set. The size of data sets for training and testing are shown in
Table 3 and Table 4.</p>
        <p>The tweets were extracted from Twitter using a targeted sampling approach. All tweets were
annotated by at least two annotators. Any conflict between the annotators was resolved by a
third annotator. The interrater agreement in subtask 1A is 69% and 72 % for English and Hindi,
respectively. For subtask 1B, the agreement for English is 55% and 68% for Hindi.</p>
        <p>
          The data set for Marathi is based on recently released MOLD dataset [
          <xref ref-type="bibr" rid="ref21">43</xref>
          ]. MOLD contains
data collected from Twitter. Gaikwad et al. [
          <xref ref-type="bibr" rid="ref21">43</xref>
          ] used 22 common curse words in Marathi
together with search phrases related to politics, entertainment, and sports along with the
hashtag #Marathi. With that, Gaikwad et al. [
          <xref ref-type="bibr" rid="ref21">43</xref>
          ] have collected a total 2,547 tweets that were
annotated by six volunteer annotators who are native speakers of Marathi. After removing
non-Marathi tweets, the final version of MOLD contains 2,499 annotated tweets randomly split
75%, 25% into training and testing sets, respectively. Only the sub-task A was available for
Marathi.
        </p>
      </sec>
    </sec>
    <sec id="sec-4">
      <title>4. Participation and Evaluation</title>
      <p>This section details the statistics about the participation in HASOC 2021 by the diferent teams
from all over the world. HASOC 2021 is the third edition of the HASOC at the Forum for
Information Retrieval (FIRE) 2021. HASOC started in 2019. This year, HASOC received a record
number of participants. A total of 102 teams registered for the participation and 65 teams have
submitted 652 runs for all the subtasks. Table 5 summarizes the statistics about the participation.
yeah when she’s finally done w you you wanna pop back into
her life fuck of
HOF
#ModiKaVaccineJumla Mr. Modi, where is your ”DeshBhakt” HOF
BJP workers now??? Do you feel COVID is attacking only the
anti-nationals or anti-BJPs ???? Shame a Curse On!!!!
@30iPpgStmILw0SI @ChinaDaily #ChineseVirus #WuhanVirus
is the #correct name for the #pandemic . #Shameless
@manoramaonline Shame on people who are still supporting
her... including Manorama. keeping MUM #ArrestMamata
#BengalBurning #BengalViolence https://t.co/o7lXp6nYZW
@timotheelvr BITCH GET OUT OF HERE WE ALL KNOW
SIALL IS REAL
I am booked in to get my first dose of the #Covidvaccine and
truth be told I am a bit nervous | First Dog on the Moon
https://t.co/u7r8ThfOLW
NOT
HOF
HOF
NOT
PRFN
OFFN
NONE
HATE
PRFN
NONE
Class
Sum</p>
      <p>English</p>
      <p>Marathi</p>
      <p>Hindi
Unlike previously, this year we decided to develop our own submission platform2 rather than
using a third party service. We also provided a leaderboard facility to all participants and the
community. The HASOC 2021 leaderboard can be accessed on our Github site3.</p>
    </sec>
    <sec id="sec-5">
      <title>5. Results</title>
      <p>This section presents the details about the results of the runs by the all participating teams who
also submitted a paper describing their system.</p>
      <p>
        Figure 1 presents histograms of the performances of all the teams. Each bin in the histogram
2https://hasocfire.github.io/submission/index.html
3https://hasocfire.github.io/submission/leaderboard.html
Sum
depicts a range of 0.01 Macro F1 score. It provides an overview over the distribution of the
results.
5.1. Hindi
The best submission for Task A was achieved with a fine-tuned Multilingual-BERT with a
classifier layer added at the final phase. The team trained on the HASOC Hindi data set for 20
epochs. With this fine-tuned Multilingual-BERT, the team [
        <xref ref-type="bibr" rid="ref23">45</xref>
        ] was able to achieve Macro F1
score of 0.7797.
      </p>
      <p>
        However, the second team was just 0.0049 points behind this best submission. Apart from
ifne-tuning a XLM-R transformer, the authors computed vector representations for emojis
using the system Emoji2Vec and sentence embeddings for hashtags. These three resulting
representations were concatenated before classification. The team was able to achieve the best
results for Task B with the same approach [
        <xref ref-type="bibr" rid="ref27">49</xref>
        ]. This shows that simply ignoring emojis and
hashtags in social media analysis might not always be the adequate approach.
      </p>
      <p>The second team in task B performed just 0.0017 points lower than this best team. This team
1
2
3
4
5
6
7
8
9
10
11
12</p>
      <p>
        NeuralSpace [
        <xref ref-type="bibr" rid="ref27">49</xref>
        ]
SATLab [
        <xref ref-type="bibr" rid="ref30">52</xref>
        ]
hate-busters
NLP-CIC
KuiYongyi [
        <xref ref-type="bibr" rid="ref29">51</xref>
        ]
UMUTeam [66]
IRLab@IITBHU [
        <xref ref-type="bibr" rid="ref38">60</xref>
        ]
PreCog IIIT Hyderabad [
        <xref ref-type="bibr" rid="ref34">56</xref>
        ]
SSN_NLP_MLRG [
        <xref ref-type="bibr" rid="ref33">55</xref>
        ]
MUM [
        <xref ref-type="bibr" rid="ref34">56</xref>
        ]
Data Pirates
      </p>
      <p>Hasnuhana
ifne-tuned a Multilingual-BERT transformer with a softmax loss function unlike the two teams
previously mentioned which both applied a binary cross Entropy loss.</p>
      <p>Tables 6 and 7 clearly indicate that the top six for Task A and the top five teams for Task B
have achieved very close Macro F1s with less than 0.001 diference. For Task A, the mean F1
score achieved by all the best submissions is 0.7436. The standard deviation of the submissions
is 0.0289. However, for the top 10 submissions, the standard deviation is only 0.0058. Which is
approximately only 1 th of the standard deviation of all teams. For task B, the mean F1 score
5
achieved by all the best submissions is 0.4493 which shows that the fine-grained classification
remains dificult. We need to consider that the interrater agreement is also low for this task. In
this case, the standard deviation between systems is 0.1114, while it is 0.0241 for the best 10
submissions. The standard deviation of all teams is approximately 4.5 times higher than the top
10 teams’ standard deviation.</p>
      <sec id="sec-5-1">
        <title>5.2. English</title>
        <p>
          The best submission for Task A used a GCN based approach in which the team defined tweets
and words as nodes. A word node is connected with all the tweet nodes to which it belongs and
a word node is connected to other word nodes that fall into the sliding window of that node
across all tweets. Furthermore, the authors used TF-IDF weights as node weights. They were
able to achieve 0.8215 as Macro F1 score [
          <xref ref-type="bibr" rid="ref39">61</xref>
          ]. The second team used a soft-voting ensemble
of four diferent transformer models jointly fine-tuned on the original training set and the
HatebaseTwitter data. using this external ressource, the team was able to achieve a F1 score
which is only 0.0016 lower than first team. However the same team ranked first in Task B
while using the same approach as for Task A and yielded a Macro F1 of 0.6577 [
          <xref ref-type="bibr" rid="ref32">54</xref>
          ]. The second
team in Task B used BERT, TF-IDF and the similarity score between the two as features and
concatenated them to feed this text representation into a classifier. They achieved a Macro F1
score of 0.6482.
        </p>
        <p>For Task A, the mean F1 score achieved by all the best submissions is 0.7569 while the
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28</p>
        <p>
          NLP-CIC
HUNLP [
          <xref ref-type="bibr" rid="ref39">61</xref>
          ]
neuro-utmn-thales [
          <xref ref-type="bibr" rid="ref32">54</xref>
          ]
HNLP [
          <xref ref-type="bibr" rid="ref28">50</xref>
          ]
Chandigarh_Concordia
KuiYongyi [
          <xref ref-type="bibr" rid="ref29">51</xref>
          ]
t1
UINSUSKA [
          <xref ref-type="bibr" rid="ref37">59</xref>
          ]
TUW-Inf [72]
UMUTeam [66]
HASOC21rub [75]
Super Mario [
          <xref ref-type="bibr" rid="ref23">45</xref>
          ]
Hasnuhana
NeuralSpace [
          <xref ref-type="bibr" rid="ref27">49</xref>
          ]
Sakshi HASOC [
          <xref ref-type="bibr" rid="ref35">57</xref>
          ]
IRLab@IITBHU [
          <xref ref-type="bibr" rid="ref38">60</xref>
          ]
PreCog IIIT Hyderabad [
          <xref ref-type="bibr" rid="ref34">56</xref>
          ]
IMS-SINAI [78]
SSN_NLP_MLRG [
          <xref ref-type="bibr" rid="ref33">55</xref>
          ]
giniUs
biCourage [79]
hate-busters
SATLab [
          <xref ref-type="bibr" rid="ref30">52</xref>
          ]
TAD
Beware Haters [82]
TeamOulu [
          <xref ref-type="bibr" rid="ref31">53</xref>
          ]
Vishesh Gupta [83]
AUST_AI
standard deviation is 0.06255. For the top 10 submissions, the standard deviation is 0.01049
which is approximately 1 th of the standard deviation of all teams. For Task B, the mean F1 score
6
achieved by the best submissions is 0.5707 and while the standard deviation is 0.0888. For the
best 10 submissions, the standard deviation is 0.0114. The standard deviation of all teams is
approximately 8 times the standard deviation of the top 10 teams.
        </p>
      </sec>
      <sec id="sec-5-2">
        <title>5.3. Marathi</title>
        <p>
          The best submission for this task use a fine tuned XLM-R Large model with a simple softmax
layer to predict the probabilities of class labels. They performed transfer learning from English
data released for OfensEval 2019 [
          <xref ref-type="bibr" rid="ref7">19</xref>
          ] and Hindi data released for HASOC 2019 [
          <xref ref-type="bibr" rid="ref6">18</xref>
          ] and show
that performing transfer learning from Hindi is better than performing transfer learning from
English. They achieved an F1 score of 0.9144 [84]. Their approach shows the importance of
performing transfer learning from a closely related language.
        </p>
        <p>
          The team in second place applied a fine tuned LaBSE transformer [ 86] on the Marathi data set
as well as on the Hindi data set and achieved a F1 score of 0.8808. Their experiments show that
LaBSE transformer [86] outperforms XLM-R in the monolingual settings, but XLM-R performs
better when Hindi and Marathi data are combined [
          <xref ref-type="bibr" rid="ref32">54</xref>
          ].
        </p>
        <p>For task A in Marathi, the mean F1 score achieved by all submissions is 0.8255 and while the
standard deviation is 0.0774. Again for the top 10 submissions, the standard deviation is much
lower and lies at 0.0143.</p>
      </sec>
    </sec>
    <sec id="sec-6">
      <title>6. Conclusions and Future Work</title>
      <p>The third edition of HASOC has shown that transformer-based classification techniques are
the state-of-the-art approach for hate speech and ofensive content identification online. This
corroborates the findings of recent related competitions such as OfensEval 2020 at SemEval
[87]. The best results obtained by participants of HASOC 2021 in terms of macro F1-score were
0.83 in English, 0.78 in Hindi, 0.91 in Marathi. From Figure 1, we can argue that the results can
be approximated by a negatively skewed distribution.</p>
      <p>In a potential future edition of HASOC, we could encourage participants to use some
timeseries based classification model for the classification of tweets [ 88]. HASOC 2021 ofered a
set of tasks for English, Hindi and Marathi. In the upcoming HASOC edition, we intend to
investigate a task for summarization of hateful and normal tweets on long-running debatable
topics [89] such as the Middle-East crisis, the Kashmir problem and religious intolerance.</p>
    </sec>
    <sec id="sec-7">
      <title>Acknowledgments</title>
      <p>We are thankful to Mr. Pavan Pandya and Mr. Harshil Modh for their contribution in developing
the HASOC run submission platform and in the annotation process. We are also thankful to Ms.
Mohana Dave and Mr. Vraj Shah for help in the data set sampling and annotation process. We
thank all reviewers for HASOC 2021 for their work in a short period of time. We also thank Ms.
Ramona Böcker for supporting the paper checking process.
[3] J. Shetty, K. Chaithali, A. M. Shetty, B. Varsha, V. Puthran, Cyber-bullying detection: A
comparative analysis of twitter data, in: Advances in Artificial Intelligence and Data
Engineering, Springer, 2020, pp. 841–855. doi:D O I h t t p s : / / d o i . o r g / 1 0 . 1 0 0 7 / 9 7 8 - 9 8 1 - 1 5 - 3 5 1 4 - 7 _ 6 2 .
[4] W. N. H. W. Ali, M. Mohd, F. Fauzi, Identification of profane words in cyberbullying
incidents within social networks, Journal of Information Science Theory and Practice 9
(2021) 24–34. doi:h t t p s : / / d o i . o r g / 1 0 . 1 6 3 3 / J I S T a P . 2 0 2 1 . 9 . 1 . 2 .
[5] L. P. Dinu, I.-B. Iordache, A. S. Uban, M. Zampieri, A computational exploration of
pejorative language in social media, in: Findings of the Association for Computational Linguistics:
EMNLP 2021, Association for Computational Linguistics, Punta Cana, Dominican Republic,
2021, pp. 3493–3498. URL: https://aclanthology.org/2021.findings-emnlp.296.
[6] T. Ranasinghe, M. Zampieri, Multilingual ofensive language identification with
crosslingual embeddings, in: Proceedings of the 2020 Conference on Empirical Methods in
Natural Language Processing (EMNLP), Association for Computational Linguistics, Online,
2020, pp. 5838–5844. URL: https://aclanthology.org/2020.emnlp-main.470. doi:1 0 . 1 8 6 5 3 /
v 1 / 2 0 2 0 . e m n l p - m a i n . 4 7 0 .
[7] S. Aldera, A. Z. Emam, M. Al-Qurishi, M. A. AlRubaian, A. Alothaim, Online
extremism detection in textual content: A systematic literature review, IEEE Access 9 (2021)
42384–42396. URL: https://doi.org/10.1109/ACCESS.2021.3064178.
[8] S. Jaki, T. De Smedt, M. Gwóźdź, R. Panchal, A. Rossa, G. De Pauw, Online hatred of women
in the incels. me forum: Linguistic analysis and automatic detection, Journal of Language
Aggression and Conflict 7 (2019) 240–268. doi: h t t p s : / / d o i . o r g / 1 0 . 1 0 7 5 / j l a c . 0 0 0 2 6 . j a k .
[9] P. Fortuna, J. Soler Company, L. Wanner, Toxic, hateful, ofensive or abusive? what are
we really classifying? an empirical analysis of hate speech datasets, in: Proceedings
of the 12th Language Resources and Evaluation Conference, LREC, Marseille, France,
May 11-16, European Language Resources Association, 2020, pp. 6786–6794. URL: https:
//aclanthology.org/2020.lrec-1.838/.
[10] M. Mladenovic, V. Osmjanski, S. Vujicic Stankovic, Cyber-aggression, cyberbullying, and
cyber-grooming: A survey and research challenges, ACM Computing Surveys 54 (2021)
1:1–1:42. URL: https://doi.org/10.1145/3424246.
[11] M. Zampieri, S. Malmasi, P. Nakov, S. Rosenthal, N. Farra, R. Kumar, Predicting the type
and target of ofensive posts in social media, in: Proceedings of the 2019 Conference of
the North American Chapter of the Association for Computational Linguistics: Human
Language Technologies, Volume 1 (Long and Short Papers), Association for Computational
Linguistics, Minneapolis, Minnesota, 2019, pp. 1415–1420. URL: https://aclanthology.org/
N19-1144. doi:1 0 . 1 8 6 5 3 / v 1 / N 1 9 - 1 1 4 4 .
[12] S. Rosenthal, P. Atanasova, G. Karadzhov, M. Zampieri, P. Nakov, SOLID: A large-scale
semi-supervised dataset for ofensive language identification, in: Findings of the
Association for Computational Linguistics: ACL-IJCNLP 2021, Association for Computational
Linguistics, Online, 2021, pp. 915–928. URL: https://aclanthology.org/2021.findings-acl.80.
doi:1 0 . 1 8 6 5 3 / v 1 / 2 0 2 1 . f i n d i n g s - a c l . 8 0 .
[13] S. Jaki, S. Steiger (Eds.), Digitale Hate Speech - Interdisziplinäre Perspektiven auf
Erkennung, Beschreibung und Regulation, Springer, Cham, 2022.
[14] S. Modha, P. Majumder, T. Mandl, C. Mandalia, Detecting and visualizing hate speech in
social media: A cyber watchdog for surveillance, Expert Systems and Applications 161
USA, 2019, pp. 70–74. URL: https://aclanthology.org/S19-2009. doi:1 0 . 1 8 6 5 3 / v 1 / S 1 9 - 2 0 0 9 .
[25] T. Ranasinghe, M. Zampieri, H. Hettiarachchi, Brums at hasoc 2019: Deep learning models
for multilingual hate speech and ofensive language identification, in: FIRE (Working
Notes), CEUR, 2019.
[26] F. M. Plaza-del Arco, M. Casavantes, H. J. Escalante, M. T. Martín-Valdivia, A. Montejo-Ráez,
M. Montes, H. Jarquín-Vásquez, L. Villaseñor-Pineda, et al., Overview of MeOfendEs
at IberLEF 2021: Ofensive language detection in Spanish variants, Procesamiento del
Lenguaje Natural 67 (2021) 183–194. URL: http://journal.sepln.org/sepln/ojs/ojs/index.php/
pln/article/view/6388.
[27] J. Gonzalo, M. Montes-y-Gómez, P. Rosso, Iberlef 2021 overview: Natural language
processing for iberian languages, in: Proceedings of the Iberian Languages Evaluation
Forum (IberLEF 2021) co-located with the Conference of the Spanish Society for Natural
Language Processing (SEPLN 2021), XXXVII International Conference of the Spanish
Society for Natural Language Processing., Málaga, Spain, September, 2021, volume 2943
of CEUR Workshop Proceedings, CEUR-WS.org, 2021, pp. 1–15. URL: http://ceur-ws.org/
Vol-2943/Overview_iberLEF_2021.pdf.
[28] E. V. Pronoza, P. Panicheva, O. Koltsova, P. Rosso, Detecting ethnicity-targeted hate speech
in Russian social media texts, Information Processing and Management 58 (2021) 102674.</p>
      <p>URL: https://doi.org/10.1016/j.ipm.2021.102674.
[29] E. Guest, B. Vidgen, A. Mittos, N. Sastry, G. Tyson, H. Z. Margetts, An expert annotated
dataset for the detection of online misogyny, in: Proceedings of the 16th Conference of
the European Chapter of the Association for Computational Linguistics: Main Volume,
EACL 2021, Online, April 19 - 23, 2021, Association for Computational Linguistics, 2021,
pp. 1336–1350. URL: https://aclanthology.org/2021.eacl-main.114/.
[30] J. Pavlopoulos, J. Sorensen, L. Laugier, I. Androutsopoulos, SemEval-2021 task 5: Toxic
spans detection, in: Proceedings of the 15th International Workshop on Semantic
Evaluation (SemEval-2021), Association for Computational Linguistics, Online, 2021. URL:
https://aclanthology.org/2021.semeval-1.6.
[31] M. Sap, D. Card, S. Gabriel, Y. Choi, N. A. Smith, The risk of racial bias in hate speech
detection, in: Proceedings of the 57th Conference of the Association for Computational
Linguistics, ACL, Florence, Italy, July 28- August 2, Volume 1: Long Papers, Association
for Computational Linguistics, 2019, pp. 1668–1678. URL: https://doi.org/10.18653/v1/
p19-1163.
[32] H. A. Kuwatly, M. Wich, G. Groh, Identifying and measuring annotator bias based on
annotators’ demographic characteristics, in: Proceedings of the Fourth Workshop on
Online Abuse and Harms, WOAH Online, November 20, Association for Computational
Linguistics, 2020, pp. 184–190. URL: https://doi.org/10.18653/v1/2020.alw-1.21.
[33] J. Salminen, H. Almerekhi, A. M. Kamel, S. Jung, B. J. Jansen, Online hate ratings vary
by extremes: A statistical analysis, in: Proceedings of the 2019 Conference on Human
Information Interaction and Retrieval, CHIIR , Glasgow, Scotland, UK, March 10-14, ACM,
2019, pp. 213–217. doi:1 0 . 1 1 4 5 / 3 2 9 5 7 5 0 . 3 2 9 8 9 5 4 .
[34] P. Fortuna, J. Soler Company, L. Wanner, How well do hate speech, toxicity, abusive
and ofensive language classification models generalize across datasets?, Information
Processing and Management 58 (2021) 102524. doi:h t t p s : / / d o i . o r g / 1 0 . 1 0 1 6 / j . i p m . 2 0 2 1 .
Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[64] A. Velankar, H. Patil, A. Gore, S. Salunke, R. Joshi, Hate and Ofensive Speech Detection
in Hindi and Marathi, in: Forum for Information Retrieval Evaluation (Working Notes)
(FIRE), CEUR-WS.org, 2021.
[65] K. Maity, A. Kumar, S. Saha, Attention Based BERT-FastText model for Hate Speech
and Ofensive Content Identification in English and Hindi Languages, in: Forum for
Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[66] C. Caparrós-Laiz, J. Antonio, G. Díaz, R. Valencia-Garcia, Detecting Hate Speech on English
and Indo-Aryan Languages with BERT and Ensemble learning, in: Forum for Information
Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[67] P. Nandi, D. Das, Detection of Hate or Ofensive Phrase using Magnified Tf-Idf, in: Forum
for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[68] S. Kannan, J. Mitrović, Hatespeech and Ofensive Content Detection in Hindi Language
using C-BiGRU, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE),
CEUR-WS.org, 2021.
[69] R. Rajalakshmi, F. Mattins, S. S, P. Reddy, A. K. M, Hate Speech and Ofensive Content
Identification in Hindi and Marathi Language Tweets using Ensemble Techniques, in:
Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[70] I. Jadhav, A. Kanade, V. Waghmare, D. Chaudhari, Hate and Ofensive Speech Detection in
Hindi Twitter Corpus, in: Forum for Information Retrieval Evaluation (Working Notes)
(FIRE), CEUR-WS.org, 2021.
[71] S. Hakimov, R. Ewerth, Combining Textual Features for the Detection of Hateful and
Ofensive Language, in: Forum for Information Retrieval Evaluation (Working Notes)
(FIRE), CEUR-WS.org, 2021.
[72] K. Gémes, A. Kovács, M. Reichel, G. Recski, Ofensive text detection on English Twitter
with deep learning models and rule-based systems, in: Forum for Information Retrieval
Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[73] J. Zeng, L. Xu, ALBERT for Hate Speech and Ofensive Content Identification, in: Forum
for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[74] S. Saseendran, S. R, S. V, S. Giri, Classification of Hate Speech and Ofensive Content
using an approach based on DistilBERT, in: Forum for Information Retrieval Evaluation
(Working Notes) (FIRE), CEUR-WS.org, 2021.
[75] W. Yu, B. Boenninghof, D. Kolossa, Hybrid Representation Fusion for Twitter Hate Speech
Identification, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE),
CEUR-WS.org, 2021.
[76] S. Sangwan, L. Dey, M. Shakir, Gated Multi-task learning framework for text classification,
in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org,
2021.
[77] Y. Xu, H. Ning, Y. Sun, Hate Speech and Ofensive Content Identification Based on
Self-Attention, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE),
CEUR-WS.org, 2021.
[78] F. M. P. del Arco, S. Halat, S. Padó, R. Klinger, Multi-Task Learning with Sentiment,
Emotion, and Target Detection to Recognize Hate Speech and Ofensive Language, in:
Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[79] R. Wilkens, D. Ognibene, biCourage: ngram and syntax GCNs for Hate Speech detection,
in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org,
2021.
[80] S. Mohtaj, V. Schmitt, S. Möller, A Feature Extraction based Model for Hate Speech
Identification, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE),
CEUR-WS.org, 2021.
[81] N. P. Motlogelwa, E. Thuma, M. Mudongo, T. Leburu-Dingalo, G. Mosweunyane,
Leveraging Text Generated from Emojis for Hate Speech and Ofensive Content Identification, in:
Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[82] D. N, R. Avireddy, A. Ambalavanan, B. R. Selvamani, Hate Speech Detection using LIME
guided Ensemble Method and DistilBERT, in: Forum for Information Retrieval Evaluation
(Working Notes) (FIRE), CEUR-WS.org, 2021.
[83] V. Gupta, R. Kumar, R. Pamula, Hate Speech and Ofensive Content Identification in
English Tweets, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE),
CEUR-WS.org, 2021.
[84] M. Nene, K. North, T. Ranasinghe, M. Zampieri, Transformer Models for Ofensive
Language Identification in Marathi, in: Forum for Information Retrieval Evaluation
(Working Notes) (FIRE), CEUR-WS.org, 2021.
[85] D. Gajbhiye, S. Deshpande, P. Ghante, A. Kale, D. Chaudhari, Machine Learning Models
for Hate Speech Identification in Marathi Language, in: Forum for Information Retrieval
Evaluation (Working Notes) (FIRE), CEUR-WS.org, 2021.
[86] F. Feng, Y. Yang, D. Cer, N. Arivazhagan, W. Wang, Language-agnostic bert sentence
embedding, CoRR abs/2007.01852 (2020). URL: https://arxiv.org/abs/2007.01852.
[87] M. Zampieri, P. Nakov, S. Rosenthal, P. Atanasova, G. Karadzhov, H. Mubarak, L.
Derczynski, Z. Pitenis, Ç. Çöltekin, Semeval-2020 task 12: Multilingual ofensive language
identification in social media (OfensEval 2020), in: Proceedings of the Fourteenth
Workshop on Semantic Evaluation, 2020, pp. 1425–1447.
[88] G. K. Shahi, I. Bilbao, E. Capecci, D. Nandini, M. Choukri, N. Kasabov, Analysis,
classification and marker discovery of gene expression data with evolving spiking neural
networks, in: International Conference on Neural Information Processing, Springer, 2018,
pp. 517–527.
[89] S. Modha, P. Majumder, T. Mandl, R. Singla, Design and analysis of microblog-based
summarization system, Social Network Analysis and Mining 11 (2021) 1–16. URL: https:
//doi.org/10.1007/s13278-021-00830-3.</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          [1]
          <string-name>
            <given-names>P.</given-names>
            <surname>Nakov</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. D. S.</given-names>
            <surname>Martino</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Elsayed</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Barrón-Cedeño</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Míguez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Shaar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Alam</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Haouari</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Hasanain</surname>
          </string-name>
          ,
          <string-name>
            <given-names>W.</given-names>
            <surname>Mansour</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Hamdan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>Z. S.</given-names>
            <surname>Ali</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Babulkov</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Nikolov</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. K.</given-names>
            <surname>Shahi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. M.</given-names>
            <surname>Struß</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Mandl</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Kutlu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>Y. S.</given-names>
            <surname>Kartal</surname>
          </string-name>
          ,
          <article-title>Overview of the CLEF-2021 CheckThat! lab on detecting check-worthy claims, previously fact-checked claims, and Fake News, in: Experimental IR Meets Multilinguality</article-title>
          , Multimodality, and Interaction - 12th
          <source>International Conference of the CLEF Association, CLEF Virtual Event, September 21-24</source>
          , volume
          <volume>12880</volume>
          of Lecture Notes in Computer Science, Springer,
          <year>2021</year>
          , pp.
          <fpage>264</fpage>
          -
          <lpage>291</lpage>
          . URL: https://doi.org/10.1007/978-3-
          <fpage>030</fpage>
          -85251-1_
          <fpage>19</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          [2]
          <string-name>
            <given-names>R.</given-names>
            <surname>Kumar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A. K.</given-names>
            <surname>Ojha</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Malmasi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Zampieri</surname>
          </string-name>
          ,
          <article-title>Evaluating aggression identification in social media</article-title>
          ,
          <source>in: Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying</source>
          ,
          <source>TRAC@LREC</source>
          <year>2020</year>
          , Marseille, France, May,
          <source>European Language Resources Association (ELRA)</source>
          ,
          <year>2020</year>
          , pp.
          <fpage>1</fpage>
          -
          <lpage>5</lpage>
          . URL: https://aclanthology.org/
          <year>2020</year>
          .trac-
          <volume>1</volume>
          .1/. (
          <year>2020</year>
          )
          <article-title>113725</article-title>
          . URL: https://doi.org/10.1016/j.eswa.
          <year>2020</year>
          .
          <volume>113725</volume>
          .
          <source>doi:1 0 . 1 0</source>
          <volume>1 6</volume>
          / j . e
          <source>s w a . 2 0</source>
          <volume>2 0 . 1 1 3 7 2 5 .</volume>
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          [15]
          <string-name>
            <given-names>T.</given-names>
            <surname>Mandl</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Modha</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Majumder</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Patel</surname>
          </string-name>
          ,
          <article-title>Overview of the HASOC track at FIRE 2019: Hate Speech and Ofensive Content Identification in Indo-European Languages), in: Working Notes of the Annual Meeting of the Forum for Information Retrieval Evaluation, FIRE, CEUR-</article-title>
          <string-name>
            <surname>WS</surname>
          </string-name>
          ,
          <year>2019</year>
          . URL: http://ceur-ws.
          <source>org/</source>
          Vol-
          <volume>2517</volume>
          /
          <fpage>T3</fpage>
          -1.pdf.
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          [16]
          <string-name>
            <given-names>T.</given-names>
            <surname>Mandl</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Modha</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. K.</given-names>
            <surname>Shahi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A. K.</given-names>
            <surname>Jaiswal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Nandini</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Patel</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Majumder</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Schäfer</surname>
          </string-name>
          ,
          <source>Overview of the HASOC track at FIRE</source>
          <year>2020</year>
          :
          <article-title>Hate speech and ofensive content identification in Indo-European Languages</article-title>
          , in: Working Notes of FIRE 2020 -
          <article-title>Forum for Information Retrieval Evaluation, Hyderabad</article-title>
          , India,
          <source>December 16-20</source>
          , volume
          <volume>2826</volume>
          , CEUR-WS.org,
          <year>2020</year>
          , pp.
          <fpage>87</fpage>
          -
          <lpage>111</lpage>
          . URL: http://ceur-ws.
          <source>org/</source>
          Vol-
          <volume>2826</volume>
          /
          <fpage>T2</fpage>
          -1.pdf.
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          [17]
          <string-name>
            <given-names>S.</given-names>
            <surname>Satapara</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Modha</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Mandl</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Madhu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Majumder</surname>
          </string-name>
          ,
          <article-title>Overview of the HASOC Subtrack at FIRE 2021: Conversational Hate Speech Detection in Code-mixed language</article-title>
          , in: Working Notes of FIRE 2021 -
          <article-title>Forum for Information Retrieval Evaluation</article-title>
          ,
          <string-name>
            <surname>CEUR</surname>
          </string-name>
          ,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          [18]
          <string-name>
            <given-names>S.</given-names>
            <surname>Modha</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Mandl</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Majumder</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Patel</surname>
          </string-name>
          ,
          <source>Overview of the HASOC track at FIRE</source>
          <year>2019</year>
          :
          <article-title>Hate speech and ofensive content identification in Indo-European Languages</article-title>
          , in: Working Notes of FIRE 2019 -
          <article-title>Forum for Information Retrieval Evaluation, Kolkata</article-title>
          , India,
          <source>December 12-15</source>
          , volume
          <volume>2517</volume>
          , CEUR-WS.org,
          <year>2019</year>
          , pp.
          <fpage>167</fpage>
          -
          <lpage>190</lpage>
          . URL: http://ceur-ws.
          <source>org/</source>
          Vol-
          <volume>2517</volume>
          /
          <fpage>T3</fpage>
          -1.pdf.
        </mixed-citation>
      </ref>
      <ref id="ref7">
        <mixed-citation>
          [19]
          <string-name>
            <given-names>M.</given-names>
            <surname>Zampieri</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Malmasi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Nakov</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Rosenthal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Farra</surname>
          </string-name>
          , R. Kumar, SemEval
          <article-title>-2019 task 6: Identifying and categorizing ofensive language in social media (OfensEval)</article-title>
          ,
          <source>in: Proceedings of the 13th International Workshop on Semantic Evaluation</source>
          ,
          <article-title>Association for Computational Linguistics</article-title>
          , Minneapolis, Minnesota, USA,
          <year>2019</year>
          , pp.
          <fpage>75</fpage>
          -
          <lpage>86</lpage>
          . URL: https: //aclanthology.org/S19-2010.
          <article-title>doi:1 0 . 1 8 6 5 3 / v 1 / S 1 9 - 2 0 1 0</article-title>
          .
        </mixed-citation>
      </ref>
      <ref id="ref8">
        <mixed-citation>
          [20]
          <string-name>
            <given-names>Z.</given-names>
            <surname>Pitenis</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Zampieri</surname>
          </string-name>
          , T. Ranasinghe,
          <article-title>Ofensive language identification in Greek</article-title>
          ,
          <source>in: Proceedings of the 12th Language Resources and Evaluation Conference</source>
          , European Language Resources Association, Marseille, France,
          <year>2020</year>
          , pp.
          <fpage>5113</fpage>
          -
          <lpage>5119</lpage>
          . URL: https://aclanthology.org/
          <year>2020</year>
          .lrec-
          <volume>1</volume>
          .
          <fpage>629</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref9">
        <mixed-citation>
          [21]
          <string-name>
            <given-names>G. I.</given-names>
            <surname>Sigurbergsson</surname>
          </string-name>
          , L. Derczynski,
          <article-title>Ofensive language and hate speech detection for Danish</article-title>
          ,
          <source>in: Proceedings of the 12th Language Resources and Evaluation Conference</source>
          , European Language Resources Association, Marseille, France,
          <year>2020</year>
          , pp.
          <fpage>3498</fpage>
          -
          <lpage>3508</lpage>
          . URL: https://aclanthology.org/
          <year>2020</year>
          .lrec-
          <volume>1</volume>
          .
          <fpage>430</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref10">
        <mixed-citation>
          [22]
          <string-name>
            <given-names>M. E.</given-names>
            <surname>Aragón</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M. Á.</given-names>
            <surname>Á. Carmona</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Montes-y Gómez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H. J.</given-names>
            <surname>Escalante</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L. V.</given-names>
            <surname>Pineda</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Moctezuma</surname>
          </string-name>
          ,
          <article-title>Overview of MEX-A3T at IberLEF 2019: Authorship and aggressiveness analysis in Mexican Spanish Tweets</article-title>
          .,
          <source>in: Iberian Languages Evaluation Forum (IberLEF) SEPLN</source>
          ,
          <year>2019</year>
          , pp.
          <fpage>478</fpage>
          -
          <lpage>494</lpage>
          . URL: http://ceur-ws.
          <source>org/</source>
          Vol-2421/MEX-A3T_
          <article-title>overview</article-title>
          .pdf.
        </mixed-citation>
      </ref>
      <ref id="ref11">
        <mixed-citation>
          [23]
          <string-name>
            <surname>Ç. Çöltekin</surname>
          </string-name>
          ,
          <article-title>A corpus of Turkish ofensive language on social media</article-title>
          ,
          <source>in: Proceedings of the 12th Language Resources and Evaluation Conference</source>
          , LREC Marseille, France, May
          <volume>11</volume>
          -16, European Language Resources Association,
          <year>2020</year>
          , pp.
          <fpage>6174</fpage>
          -
          <lpage>6184</lpage>
          . URL: https: //aclanthology.org/
          <year>2020</year>
          .lrec-
          <volume>1</volume>
          .758/.
        </mixed-citation>
      </ref>
      <ref id="ref12">
        <mixed-citation>
          [24]
          <string-name>
            <given-names>V.</given-names>
            <surname>Indurthi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>B.</given-names>
            <surname>Syed</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Shrivastava</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Chakravartula</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Gupta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Varma</surname>
          </string-name>
          , FERMI at SemEval
          <article-title>-2019 task 5: Using sentence embeddings to identify hate speech against immigrants and women in Twitter</article-title>
          ,
          <source>in: Proceedings of the 13th International Workshop on Semantic Evaluation</source>
          ,
          <article-title>Association for Computational Linguistics</article-title>
          , Minneapolis,
          <source>Minnesota, 1</source>
          <volume>0 2 5 2 4 .</volume>
        </mixed-citation>
      </ref>
      <ref id="ref13">
        <mixed-citation>
          [35]
          <string-name>
            <given-names>W.</given-names>
            <surname>Yin</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Zubiaga</surname>
          </string-name>
          ,
          <article-title>Towards generalisable hate speech detection: a review on obstacles and solutions</article-title>
          ,
          <source>PeerJ Computer Science</source>
          <volume>7</volume>
          (
          <year>2021</year>
          )
          <article-title>e598</article-title>
          .
          <source>doi:1 0 . 7 7</source>
          <volume>1 7</volume>
          / p e e r j - c
          <source>s . 5 9 8 .</source>
        </mixed-citation>
      </ref>
      <ref id="ref14">
        <mixed-citation>
          [36]
          <string-name>
            <given-names>B.</given-names>
            <surname>Vidgen</surname>
          </string-name>
          , L. Derczynski,
          <article-title>Directions in abusive language training data, a systematic review: Garbage in, garbage out</article-title>
          ,
          <source>PLOS ONE 15</source>
          (
          <year>2021</year>
          )
          <fpage>1</fpage>
          -
          <lpage>32</lpage>
          . URL: https://doi.org/10.1371/journal. pone.
          <volume>0243300</volume>
          .
        </mixed-citation>
      </ref>
      <ref id="ref15">
        <mixed-citation>
          [37]
          <string-name>
            <given-names>T.</given-names>
            <surname>Ranasinghe</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Zampieri</surname>
          </string-name>
          ,
          <article-title>An evaluation of multilingual ofensive language identification methods for the languages of india</article-title>
          ,
          <source>Information</source>
          <volume>12</volume>
          (
          <year>2021</year>
          ). URL: https://www.mdpi.
          <source>com/ 2078-2489/12/8/306. doi:1 0 . 3 3 9 0 / i n f o 1 2</source>
          <volume>0 8 0 3 0 6 .</volume>
        </mixed-citation>
      </ref>
      <ref id="ref16">
        <mixed-citation>
          [38]
          <string-name>
            <given-names>T.</given-names>
            <surname>Ranasinghe</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Zampieri</surname>
          </string-name>
          ,
          <article-title>Multilingual ofensive language identification for low-resource languages</article-title>
          ,
          <source>ACM Transactions on Asian and Low-Resource Language Information Processing</source>
          <volume>21</volume>
          (
          <year>2021</year>
          ). URL: https://doi.org/10.1145/3457610.
        </mixed-citation>
      </ref>
      <ref id="ref17">
        <mixed-citation>
          [39]
          <string-name>
            <given-names>G. K.</given-names>
            <surname>Shahi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dirkson</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T. A.</given-names>
            <surname>Majchrzak</surname>
          </string-name>
          ,
          <article-title>An exploratory study of covid-19 misinformation on twitter</article-title>
          ,
          <source>Online social networks and media 22</source>
          (
          <year>2021</year>
          )
          <fpage>100104</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref18">
        <mixed-citation>
          [40]
          <string-name>
            <given-names>G. K.</given-names>
            <surname>Shahi</surname>
          </string-name>
          ,
          <string-name>
            <surname>Amused:</surname>
          </string-name>
          <article-title>An annotation framework of multi-modal social media data</article-title>
          , arXiv preprint arXiv:
          <year>2010</year>
          .
          <volume>00502</volume>
          (
          <year>2020</year>
          ).
        </mixed-citation>
      </ref>
      <ref id="ref19">
        <mixed-citation>
          [41]
          <string-name>
            <given-names>G. K.</given-names>
            <surname>Shahi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Nandini</surname>
          </string-name>
          ,
          <article-title>Fakecovid-a multilingual cross-domain fact check news dataset for covid-</article-title>
          19, arXiv preprint arXiv:
          <year>2006</year>
          .
          <volume>11343</volume>
          (
          <year>2020</year>
          ).
        </mixed-citation>
      </ref>
      <ref id="ref20">
        <mixed-citation>
          [42]
          <string-name>
            <given-names>T.</given-names>
            <surname>Mandl</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Modha</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Majumder</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Patel</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Dave</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Mandlia</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Patel</surname>
          </string-name>
          ,
          <source>Overview of the HASOC track at FIRE</source>
          <year>2019</year>
          :
          <article-title>Hate speech and ofensive content identification in Indo-European languages</article-title>
          ,
          <source>in: Proceedings of the 11th Forum for Information Retrieval Evaluation</source>
          ,
          <year>2019</year>
          , pp.
          <fpage>14</fpage>
          -
          <lpage>17</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref21">
        <mixed-citation>
          [43]
          <string-name>
            <given-names>S. S.</given-names>
            <surname>Gaikwad</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Ranasinghe</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Zampieri</surname>
          </string-name>
          ,
          <string-name>
            <given-names>C.</given-names>
            <surname>Homan</surname>
          </string-name>
          ,
          <article-title>Cross-lingual ofensive language identification for low resource languages: The case of Marathi</article-title>
          ,
          <source>in: Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP)</source>
          ,
          <source>Held Online, 1-3 September</source>
          ,
          <year>2021</year>
          , pp.
          <fpage>437</fpage>
          -
          <lpage>443</lpage>
          . URL: https://aclanthology.org/
          <year>2021</year>
          .ranlp-
          <volume>1</volume>
          .
          <fpage>50</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref22">
        <mixed-citation>
          [44]
          <string-name>
            <given-names>A.</given-names>
            <surname>Hegde</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M. D.</given-names>
            <surname>Anusha</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H. L.</given-names>
            <surname>Shashirekha</surname>
          </string-name>
          ,
          <article-title>Ensemble Based Machine Learning Models for Hate Speech and Ofensive Content Identification, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref23">
        <mixed-citation>
          [45]
          <string-name>
            <given-names>S.</given-names>
            <surname>Banerjee</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Sarkar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Agrawal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Saha</surname>
          </string-name>
          ,
          <string-name>
            <surname>M. Das</surname>
          </string-name>
          ,
          <article-title>Exploring Transformer Based Models to Identify Hate Speech and Ofensive Content in English and Indo-Aryan Languages, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref24">
        <mixed-citation>
          [46]
          <string-name>
            <given-names>Y.</given-names>
            <surname>Hacohen-Kerner</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Uzan</surname>
          </string-name>
          , Detecting Ofensive Language in English, Hindi, and
          <article-title>Marathi using Classical Supervised Machine Learning Methods and Word/Char N-grams, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref25">
        <mixed-citation>
          [47]
          <string-name>
            <given-names>P.</given-names>
            <surname>Mankar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Gangurde</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Chaudhari</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Pawar</surname>
          </string-name>
          ,
          <article-title>Machine Learning Models for Hate Speech and Ofensive Language Identification for Indo-Aryan Language: Hindi, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref26">
        <mixed-citation>
          [48]
          <string-name>
            <given-names>M. S.</given-names>
            <surname>Jahan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Oussalah</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. K.</given-names>
            <surname>Mim</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Islam</surname>
          </string-name>
          ,
          <string-name>
            <surname>Ofensive Language Identification Using Hindi-English Code-Mixed Tweets</surname>
          </string-name>
          , and
          <article-title>Code-Mixed Data Augmentation, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref27">
        <mixed-citation>
          [49]
          <string-name>
            <given-names>M.</given-names>
            <surname>Bhatia</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T. S.</given-names>
            <surname>Bhotia</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Agarwal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Ramesh</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Gupta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>K.</given-names>
            <surname>Shridhar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Laumann</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Dash</surname>
          </string-name>
          , One to Rule Them All:
          <article-title>Towards Joint Indic Language Hate Speech Detection, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref28">
        <mixed-citation>
          [50]
          <string-name>
            <given-names>A.</given-names>
            <surname>Mitra</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Sankhala</surname>
          </string-name>
          ,
          <article-title>Multilingual Hate Speech and Ofensive Content Detection using Modified Cross-entropy Loss, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref29">
        <mixed-citation>
          [51]
          <string-name>
            <given-names>Y.</given-names>
            <surname>Kui</surname>
          </string-name>
          ,
          <article-title>Detect Hate and Ofensive Content in English and Indo-Aryan Languages based on Transformer, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref30">
        <mixed-citation>
          [52]
          <string-name>
            <given-names>Y.</given-names>
            <surname>Bestgen</surname>
          </string-name>
          ,
          <article-title>A simple language-agnostic yet strong baseline system for hate speech and ofensive content identification, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref31">
        <mixed-citation>
          [53]
          <string-name>
            <given-names>M. S.</given-names>
            <surname>Jahan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D. R.</given-names>
            <surname>Beddiar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Oussalah</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Arhab</surname>
          </string-name>
          ,
          <string-name>
            <given-names>Y.</given-names>
            <surname>Bounab</surname>
          </string-name>
          ,
          <article-title>Hate and Ofensive language detection using BERT for English Subtask A, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref32">
        <mixed-citation>
          [54]
          <string-name>
            <given-names>A.</given-names>
            <surname>Glazkova</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Kadantsev</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Glazkov</surname>
          </string-name>
          ,
          <article-title>Fine-tuning of Pre-trained Transformers for Hate, Ofensive, and Profane Content Detection in English and Marathi, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref33">
        <mixed-citation>
          [55]
          <string-name>
            <given-names>K.</given-names>
            <surname>Adaikkan</surname>
          </string-name>
          , T. Durairaj,
          <article-title>Multilingual Hate speech and Ofensive language detection in English, Hindi, and Marathi languages, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref34">
        <mixed-citation>
          [56]
          <string-name>
            <given-names>A.</given-names>
            <surname>Kadam</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Goel</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Jain</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J. S.</given-names>
            <surname>Kalra</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Subramanian</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Reddy</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Kodali</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A. T. H</given-names>
            ,
            <surname>M. Shrivastava</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Kumaraguru</surname>
          </string-name>
          ,
          <article-title>Battling Hateful Content in Indic Languages HASOC '21, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref35">
        <mixed-citation>
          [57]
          <string-name>
            <given-names>S.</given-names>
            <surname>Kalra</surname>
          </string-name>
          ,
          <string-name>
            <given-names>K. N.</given-names>
            <surname>Inani</surname>
          </string-name>
          ,
          <string-name>
            <given-names>Y.</given-names>
            <surname>Sharma</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. S.</given-names>
            <surname>Chauhan</surname>
          </string-name>
          ,
          <article-title>Detection of Hate, Ofensive and Profane Content from the Post of Twitter using Transformer-Based Models, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref36">
        <mixed-citation>
          [58]
          <string-name>
            <given-names>A.</given-names>
            <surname>Anand</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Golecha</surname>
          </string-name>
          , B. B,
          <string-name>
            <given-names>B.</given-names>
            <surname>Jayaraman</surname>
          </string-name>
          , M. T. T,
          <article-title>Machine Learning based hate speech identification for English and Indo-Aryan languages, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref37">
        <mixed-citation>
          [59]
          <string-name>
            <given-names>S.</given-names>
            <surname>Agustian</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Saputra</surname>
          </string-name>
          ,
          <string-name>
            <surname>A</surname>
          </string-name>
          . Fadhilah, “
          <article-title>Feature Selection” with Pretrained-BERT for Hate Speech and Ofensive Content Identification in English and Hindi Languages, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref38">
        <mixed-citation>
          [60]
          <string-name>
            <given-names>S.</given-names>
            <surname>Chanda</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Ujjwal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Das</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Pal</surname>
          </string-name>
          ,
          <article-title>Fine-tuning Pre-Trained Transformer based model for Hate Speech and Ofensive Content Identification in English, Indo-Aryan and Code-Mixed (English-Hindi) languages, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref39">
        <mixed-citation>
          [61]
          <string-name>
            <given-names>N.</given-names>
            <surname>Bölücü</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Canbay</surname>
          </string-name>
          ,
          <article-title>Hate Speech and Ofensive Content Identification with Graph Convolutional Networks, in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref40">
        <mixed-citation>
          [62]
          <string-name>
            <given-names>A.</given-names>
            <surname>Kumar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P. K.</given-names>
            <surname>Roy</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Saumya</surname>
          </string-name>
          ,
          <article-title>An Ensemble Approach for Hate and Ofensive Language Identification in English and Indo-Aryan Languages , in: Forum for Information Retrieval Evaluation (Working Notes) (FIRE), CEUR-WS</article-title>
          .org,
          <year>2021</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref41">
        <mixed-citation>
          [63]
          <string-name>
            <given-names>R.</given-names>
            <surname>Rajalakshmi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Srivarshan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Mattins</surname>
          </string-name>
          , K. E,
          <string-name>
            <given-names>P.</given-names>
            <surname>Seshadri</surname>
          </string-name>
          ,
          <string-name>
            <surname>A. K. M,</surname>
          </string-name>
          <article-title>Conversational Hate-Speech detection in Code-Mixed Hindi-English Tweets</article-title>
          , in: Forum for Information
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>