<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="es">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Overview of TASS 2016 Resumen de TASS 2016</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Miguel</forename><surname>Ángel</surname></persName>
						</author>
						<author>
							<persName><forename type="first">García</forename><surname>Cumbreras</surname></persName>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Julio</forename><forename type="middle">Villena</forename><surname>Román</surname></persName>
							<affiliation key="aff24">
								<address>
									<postCode>28034</postCode>
									<settlement>Sngular, Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff26">
								<orgName type="department">Sngular Data&amp;Analytics</orgName>
								<address>
									<addrLine>Av, LLano Castellano 13, Planta 5</addrLine>
									<postCode>28034</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Eugenio</forename><forename type="middle">Martínez</forename><surname>Cámara</surname></persName>
							<email>emcamara@ujaen.es</email>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Manuel</forename><surname>Carlos</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Díaz</forename><surname>Galiano</surname></persName>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">M</forename><surname>Teresa</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Martín</forename><surname>Valdivia</surname></persName>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">L</forename><forename type="middle">Alfonso</forename><surname>Ureña</surname></persName>
							<email>laurena@ujaen.es</email>
						</author>
						<author>
							<persName><forename type="first">Julio</forename><surname>Villena-Román</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Miguel</forename><forename type="middle">Á</forename><surname>García Cumbreras</surname></persName>
						</author>
						<author>
							<persName><forename type="first">T</forename><forename type="middle">U</forename><surname>Darmstadt</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Manuel</forename><forename type="middle">C</forename><surname>Díaz Galiano</surname></persName>
						</author>
						<author>
							<persName><forename type="first">L</forename><surname>Alfonso</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Ureña</forename><surname>López</surname></persName>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Alexandra</forename><surname>Balahur</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Jose</forename><surname>María</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Gómez</forename><surname>Hidalgo</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Sara</forename><forename type="middle">Lana</forename><surname>Serrano</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Constantine</forename><surname>Orasan</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Jose</forename><surname>Manuel</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Perea</forename><surname>Ortega</surname></persName>
						</author>
						<author>
							<persName><forename type="first">José</forename><forename type="middle">Antonio</forename><surname>Troyano Jiménez</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Antonio</forename><surname>Quirós</surname></persName>
							<email>antonio.quiros@sngular.team</email>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff24">
								<address>
									<postCode>28034</postCode>
									<settlement>Sngular, Madrid</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
							<affiliation key="aff26">
								<orgName type="department">Sngular Data&amp;Analytics</orgName>
								<address>
									<addrLine>Av, LLano Castellano 13, Planta 5</addrLine>
									<postCode>28034</postCode>
									<settlement>Madrid</settlement>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Isabel</forename><surname>Segura-Bedmar</surname></persName>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Paloma</forename><surname>Martínez</surname></persName>
							<affiliation key="aff23">
								<orgName type="institution">Universidad de Jaén</orgName>
								<address>
									<postCode>23071</postCode>
									<settlement>Jaén</settlement>
									<country key="ES">Spain</country>
								</address>
							</affiliation>
							<affiliation key="aff25">
								<orgName type="department">Departamento de Informática</orgName>
								<orgName type="institution">Universidad Calos III de Madrid Avd. de la Universidad</orgName>
								<address>
									<addrLine>30</addrLine>
									<postCode>28911</postCode>
									<settlement>Leganés</settlement>
									<region>Madrid</region>
									<country key="ES">España</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Jhon</forename><surname>Adrián Cerón-Guzmán</surname></persName>
						</author>
						<author>
							<affiliation key="aff0">
								<orgName type="laboratory">Evaluación de Modelos de Representación del Texto con Vectores de Dimensión Reducida para Análisis de Sentimiento Edgar Casasola Murillo</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff1">
								<orgName type="institution">Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff2">
								<orgName type="institution">Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff3">
								<orgName type="institution">Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff4">
								<orgName type="institution">Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff5">
								<orgName type="institution">Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff6">
								<orgName type="institution">Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff7">
								<orgName type="institution">Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff8">
								<orgName type="institution">M. Teresa Martín Valdivia Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff9">
								<orgName type="institution">L. Alfonso Ureña López Universidad de Jaén</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff10">
								<orgName type="laboratory">EC-Joint Research Centre (Italia</orgName>
								<orgName type="institution">José Carlos Cortizo Universidad Europea de Madrid (España)</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff11">
								<orgName type="department">Lluís F</orgName>
								<orgName type="institution" key="instit1">José Carlos González-Cristobal Universidad Politécnica de Madrid (España</orgName>
								<orgName type="institution" key="instit2">Hurtado Universidad de Valencia (España</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff12">
								<orgName type="department">Carlos A</orgName>
								<orgName type="institution">Iglesias Fernández Universidad Politécnica de Madrid (España</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff13">
								<orgName type="department">Zornitsa Kozareva Information Sciences Institute (EE.UU</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff14">
								<orgName type="institution" key="instit1">Universidad Politécnica de Madrid (España</orgName>
								<orgName type="institution" key="instit2">Mitkov University of Wolverhampton</orgName>
								<orgName type="institution" key="instit3">Reino Unido</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff15">
								<orgName type="institution">Andrés Montoyo Universidad de Alicante (España</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff16">
								<orgName type="institution">Rafael Muñoz Universidad de Alicante (España</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff17">
								<orgName type="institution">University of Wolverhampton (Reino Unido)</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff18">
								<orgName type="institution">Universidad de Extremadura (España</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff19">
								<orgName type="institution">Ferran Pla Santamaría Universidad de Valencia (España) María Teresa Taboada Gómez Simon Fraser University</orgName>
								<address>
									<settlement>Canadá</settlement>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff20">
								<orgName type="institution">Thelwall University of Wolverhampton (Reino Unido</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff21">
								<orgName type="department">CEUR Workshop Proceedings</orgName>
								<orgName type="institution">Universidad de Sevilla (España)</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff22">
								<orgName type="laboratory">Evaluación de Modelos de Representación del Texto con Vectores de Dimensión Reducida para Análisis de Sentimiento Edgar Casasola Murillo</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff27">
								<orgName type="department">Análisis de Sentimiento</orgName>
								<address>
									<addrLine>Vectores de palabras</addrLine>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff28">
								<orgName type="institution">Santiago de Cali</orgName>
								<address>
									<settlement>Valle del Cauca</settlement>
									<country key="CO">Colombia</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Overview of TASS 2016 Resumen de TASS 2016</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">395AE1833BBC708C1AD0F021A8378DFC</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-24T10:55+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>TASS 2016</term>
					<term>análisis de opiniones</term>
					<term>medios sociales TASS 2016</term>
					<term>sentiment analysis</term>
					<term>social media Sentiment Analysis</term>
					<term>Word embeddings Análisis de sentimientos</term>
					<term>clasificación de polaridad</term>
					<term>combinación de clasificadores</term>
					<term>normalización léxica</term>
					<term>tuis en español</term>
					<term>Twitter Ensemble classifier</term>
					<term>lexical normalization</term>
					<term>polarity classification</term>
					<term>sentiment analysis</term>
					<term>Spanish tweets</term>
					<term>Twitter</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Resumen: Este artículo describe la quinta edición del taller de evaluación experimental TASS 2016, enmarcada dentro del Congreso Internacional SEPLN 2016. El principal objetivo de TASS es promover la investigación y el desarrollo de nuevos algoritmos, recursos y técnicas para el análisis de sentimientos en medios sociales (concretamente en Twitter), aplicado al idioma español. Este artículo describe las tareas propuestas en TASS 2016, así como el contenido de los corpus utilizados, los participantes en las distintas tareas, los resultados generales obtenidos y el análisis de estos resultados.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="es">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Agradecimientos</head><p>La organización de TASS ha contado con la colaboración de investigadores que participan en los siguiente proyectos de investigación:</p><p>• REDES (TIN2015-65136-C2-1-R) CEUR Workshop Proceedings ISSN: 1613-0073</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Preámbulo</head><p>Actualmente el español es la segunda lengua materna del mundo por número de hablantes tras el chino mandarín, y la segunda lengua mundial en cómputo global de hablantes. Esa segunda posición se traduce en un 6,7% de población mundial que se puede considerar hispanohablante. La presencia del español en el mundo no tiene una correspondencia directa con el nivel de investigación en el ámbito del Procesamiento del Lenguaje Natural, y más concretamente en la tarea que nos atañe, el Análisis de Opiniones. Por consiguiente, el Taller de Análisis de Sentimientos en la SEPLN <ref type="bibr">(TASS)</ref> tiene como objetivo la promoción de la investigación del tratamiento del español en sistemas de Análisis de Opiniones, mediante la evaluación competitiva de sistemas de procesamiento de opiniones.</p><p>En la edición de 2016 han participado 7 equipos, de los que 6 han enviado un artículo describiendo el sistema que han presentado, habiendo sido aceptados los 6 artículos tras ser revisados por el comité organizador. La revisión se llevó a cabo con la intención de publicar sólo aquellos que tuvieran un mínimo de calidad científica.</p><p>La edición de 2016 tendrá lugar en el seno del XXXII Congreso Internacional de la Sociedad Española para el Procesamiento del Lenguaje Natural, que se celebrará el próximo mes de septiembre en Salamanca (España) dentro del V Congreso Español de Informática <ref type="bibr">(CEDI 2016)</ref>.</p><p>Septiembre de 2016 Los editores CEUR Workshop Proceedings ISSN: 1613-0073</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Preamble</head><p>Currently Spanish is the second native language in the world by number of speakers after the Mandarin Chinese. This second position means that the 6.7% of the world population is Spanish-speaking. The presence of the Spanish language in the world has not a direct correspondence with the number of research works related to the treatment of Spanish language in the context of Natural Language Processing, and specially in the field of Sentiment Analysis. Therefore, the Workshop on Sentiment Analysis at SEPLN <ref type="bibr">(TASS)</ref> aims to promote the research of the treatment of texts written in Spanish in Sentiment Analysis systems by means of the competitive assessment of opinion processing systems.</p><p>Seven teams have participated in the 2016 edition of the workshop. Six of the seven teams have submitted a description paper of their systems. After a review process, the organizing committee has accepted the 6 papers, because all of them reached an acceptable scientific quality level.</p><p>The 2016 edition will be held at the 32 nd International Conference of the Spanish Society for Natural Language Processing (SEPLN 2016), which will take place at Salamanca in September framed by the 5 th Spanish Conference of Computer Science <ref type="bibr">(CEDI 2016)</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>September 2016</head><p>The editors</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>TASS is an experimental evaluation workshop, a satellite event of the annual SEPLN Conference, with the aim to promote the research on Sentiment Analysis in social media focused on the Spanish language. The fifth edition will be held on September 13th, 2016 at the University of Salamanca, Spain.</p><p>Sentiment Analysis (SA) is traditionally defined as the computational treatment of opinion, sentiment and subjectivity in texts <ref type="bibr">(Pang &amp; Lee, 2008)</ref>. However, <ref type="bibr" target="#b0">Cambria and Hussain (2012)</ref> offer a more updated definition: Computational techniques for the extraction, classification, understanding and evaluation of opinions and comments published on the Internet and other kind of user generated contents. It is a hard task because even humans often disagree on the polarity of a given text. And it is a harder task when the text has only 140 characters (Twitter messages or tweets).</p><p>Although SA is not a new task, it is still challenging, because the state of the art has not yet resolved some problems related to multilingualism, domain adaptation, text genre adaptation and polarity classification at fine grained level. Polarity classification has usually been tackled following two main approaches. The first one applies machine learning algorithms in order to train a polarity classifier using a labelled corpus <ref type="bibr" target="#b19">(Pang et al. 2002)</ref>. This approach is also known as the supervised approach. The second one is known as semantic orientation, or the unsupervised approach, and it integrates linguistic resources in a model in order to identify the valence of the opinions <ref type="bibr">(Turney 2002)</ref>.</p><p>The aim of TASS is to provide a competitive forum where the newest research works in the field of SA in social media, specifically focused on Spanish tweets, are described and discussed by scientific and business communities.</p><p>The rest of the paper is organized as follows. Section 2 describes the different corpus provided to participants. Section 3 shows the different tasks <ref type="bibr" target="#b46">of TASS 2016</ref>. Section 4 describes the participants and the overall results are presented in Section 5. Finally, the last section shows some conclusions and future directions.</p><p>2 Corpus <ref type="bibr" target="#b46">TASS 2016</ref> experiments are based on two corpora, specifically built for the different editions of the workshop.</p><p>The two corpora will be made freely available to the community after the workshop. Please send an email to tass@sngularmeaning.team filling in the TASS Corpus License agreement with your email, affiliation (institution, company or any kind of organization) and a brief description of your research objectives, and you will be given a password to download the files in the password protected area. The only requirement is to include a citation to a relevant paper and/or the TASS website.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1">General corpus</head><p>The General Corpus contains over 68.000 tweets, written in Spanish, about 150 wellknown personalities and celebrities of the world of politics, economy, communication, mass media and culture, between November 2011 and March 2012. Although the context of extraction has a Spanish-focused bias, the diverse nationality of the authors, including people from Spain, Mexico, Colombia, Puerto Rico, USA and many other countries, makes the corpus reach a global coverage in the Spanishspeaking world. Each tweet includes its ID (tweetid), the creation date (date) and the user ID (user). Due to restrictions in the Twitter API Terms of Service (https://dev.twitter.com/terms/apiterms), it is forbidden to redistribute a corpus that includes text contents or information about users. However, it is valid if those fields are removed and instead IDs (including Tweet IDs and user IDs) are provided. The actual message content can be easily obtained by making queries to the Twitter API using the tweetid.</p><p>The general corpus has been divided into training set (about 10%) and test set (90%). The training set was released, so the participants could train and validate their models. The test corpus was provided without any tagging and has been used to evaluate the results.</p><p>Obviously, it was not allowed to use the test data from previous years to train the systems.</p><p>Each tweet was tagged with its global polarity (positive, negative or neutral sentiment) or no sentiment at all. A set of 6 labels has been defined: strong positive (P+), positive (P), neutral (NEU), negative (N), strong negative (N+) and one additional no sentiment tag (NONE).</p><p>In addition, there is also an indication of the level of agreement or disagreement of the expressed sentiment within the content, with two possible values: AGREEMENT and DISAGREEMENT. This is especially useful to make out whether a neutral sentiment comes from neutral keywords or else the text contains positive and negative sentiments at the same time.</p><p>Moreover, the polarity values related to the entities that are mentioned in the text are also included for those cases when applicable. These values are similarly tagged with 6 possible values and include the level of agreement as related to each entity.</p><p>This corpus is based on a selection of a set of topics. Thematic areas such as "política" ("politics"), "fútbol" ("soccer"), "literatura" ("literature") or "entretenimiento" ("entertainment"). Each tweet in the training and test set has been assigned to one or several of these topics (most messages are associated to just one topic, due to the short length of the text).</p><p>The annotation has been semi-automatically done: a baseline machine learning model is first run and then all tags are checked by human experts. In the case of the polarity at entity level, due to the high volume of data to check, the human annotation has only been done for the training set.</p><p>Table <ref type="table" target="#tab_0">1</ref> shows a summary of the training and test corpora provided to participants. Users were journalists (periodistas), politicians (políticos) or celebrities (famosos). The only language involved was Spanish (es).</p><p>The list of topics that have been selected is the following:</p><p>• Politics (política)</p><p>• Entertainment (entretenimiento)</p><p>• Economy (economía)</p><p>• Music (música)</p><p>• Soccer (fútbol)</p><p>• Films (películas)</p><p>• Technology (tecnología)</p><p>• Sports (deportes)</p><p>• Literature (literatura)</p><p>• Other (otros)</p><p>The corpus is encoded in XML. Figure <ref type="figure" target="#fig_0">1</ref> shows the information of two tweets. The first tweet is only annotated with the polarity at tweet level because there is not any entity in the text. However, the second one is annotated with the global polarity of the message and the polarity associated to each of the entities that appear in the text (UPyD and Foro Asturias). system, environmental policy... Each aspect is related to one or several entities that correspond to one of the main political parties in Spain, which are:</p><formula xml:id="formula_0">• Partido_Popular (PP) • Partido_Socialista_Obrero_Español (PSOE) • Izquierda_Unida (IU) • Podemos • Ciudadanos (C's) • Unión_Progreso_y_Democracia (UPyD)</formula><p>Each tweet in the corpus has been manually annotated by two annotators, and a third one in case of disagreement, with the sentiment polarity at aspect level. Sentiment polarity has been tagged from the point of view of the person who writes the tweet, using 3 levels: P, NEU and N. Again, no difference is made between no sentiment and a neutral sentiment (neither positive nor negative). Each political aspect is linked to its correspondent political party and its polarity.    </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Description of tasks</head><p>Since the first edition of TASS, a new task and a new corpus have been published. However, one of the aims of TASS is the evaluation of the progress of the research on SA. Thus, the edition of 2016 was focused on the analysis and the comparison of the systems with the submissions of previous editions. The edition of 2016 was focused on two tasks: polarity classification at tweet level and polarity classification at entity level. The polarity classification task has been proposed with the same corpus since the first edition of TASS, but the polarity classification at aspect level has been proposed with a different corpus each edition. In the edition of 2016 the classification at aspect level uses the STOMPOL corpus, which was published the first time in the edition of 2015.</p><p>Participants are expected to submit up to 3 results of different experiments for one or both of these tasks, in the appropriate format described below.</p><p>Along with the submission of experiments, participants have been invited to submit a paper to the workshop in order to describe their experiments and discussing the results with the audience in a regular workshop session.</p><p>The two proposed tasks are described next.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1">Task 1: Sentiment Analysis at Global Level</head><p>This task consists on performing an automatic polarity classification to determine the global polarity of each message in the test set of the General Corpus. The training set of the corpus was provided to the participants with the aim they could train and validate their models with it. There were two different evaluations: one based on 6 different polarity labels (P+, P, NEU, N, N+, NONE) and another based on just 4 labels (P, N, NEU, NONE).</p><p>Participants are expected to submit (up to 3) experiments for the 6-labels evaluation, and they are also allowed to submit (up to 3) specific experiments for the 4-labels scenario.</p><p>Results must be submitted in a plain text file with the following format:</p><p>tweetid \t polarity where polarity can be:</p><p>• P+, P, NEU, N, N+ and NONE for the 6-labels case • P, NEU, N and NONE for the 4-labels case.</p><p>The same test corpus of previous years was used for the evaluation in order to develop a comparison among the systems. The accuracy is one of the measures used to evaluate the systems, however due to the fact that the training corpus is not totally balanced the systems were also assessed by the macroaveraged precision, macro-averaged recall and macro-averaged F1-measure.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2">Task 2: Aspect-based sentiment analysis</head><p>A corpus with the entities and the aspect identified was provided to the participants, so the goal of the systems is the inference of the polarity at the aspect-level. As in 2015, STOMPOL corpus was the corpus used in this task. STOMPOL was divided in training and test set, the first one for the development and validation of the systems, and the second for evaluation.</p><p>Participants are expected to submit up to 3 experiments for each corpus, each in a plain text file with the following format: tweetid \t aspect-entity \t polarity Allowed polarity values are: P, N and NEU. For the evaluation, a single label combining "aspect-polarity" has been considered. As in the first task, accuracy, macro-averaged precision, macro-averaged recall and macro-averaged F1measure have been calculated for the global result.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Participants and Results</head><p>This year 7 (7 last year) groups submitted their systems The list of active participant groups is shown in Table <ref type="table" target="#tab_3">3</ref>, including the tasks in which they have participated.</p><p>Six of the seven participant groups sent a report describing their experiments and results achieved. Papers were reviewed and included in the workshop proceedings. References are listed in Table <ref type="table" target="#tab_4">4</ref>.  </p><formula xml:id="formula_1">Group 1 2 jacerong X ELiRF-UPV X X LABDA X INGEOTEC X GASUCR X GTI X SINAI_w2v X Total 6 1</formula></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5">Results</head><p>This section will be focused on the description and the analysis of the results and the systems submitted by the participants.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.1">Task 1: Sentiment Analysis at Global Level</head><p>Submitted runs and results for Task 1, evaluation based on 5 polarity levels with the whole General test Corpus are shown in Table <ref type="table" target="#tab_6">5</ref>. Accuracy, macro-averaged precision, macroaveraged recall and macro-averaged F1-measure have been used to evaluate each individual label and ranking the systems.  In order to perform a more in-depth evaluation, results are calculated considering the classification only in 3 levels (POS, NEU,  NEG) and no sentiment (NONE) merging P and P+ in only one category, as well as N and N+ in another one. The results reached by the submitted systems are shown in Table <ref type="table" target="#tab_8">6</ref>.  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.2">Task 2: Aspect-based Sentiment Analysis</head><p>Submitted runs and results for Task 2, with the STOMPOL corpus, are shown in Table <ref type="table" target="#tab_10">7</ref>. Accuracy, macro-averaged precision, macroaveraged recall and macro-averaged F1measure have been used to evaluate each individual label and ranking the systems.  </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.3">Description of the systems</head><p>The systems submitted in the edition of 2016 represent the next step of the ones submitted in the previous edition. The systems may be cluster in two groups, those ones that rely on the classification power of the ensemble of several base classifiers, and those systems that change the use traditional Bag-of-Words model for the use of vectors of word embeddings in order to represent the meaning of each word. In the subsequent paragraphs the main features of the systems submitted are going to be depicted. <ref type="bibr" target="#b39">Hurtado and Pla (2016)</ref> describe the participation of the team ELiRF-UPV in the two tasks <ref type="bibr" target="#b46">of TASS 2016</ref>. The only difference between the systems submitted for the two tasks is the fact that the one focused on the second task has a module for the identification of the context of each of the entities and aspects annotated on the tweets. The polarity classification system relies on the ensemble of 192 configurations of a SVM classifiers. For the combination of the set of classifiers they evaluate the performance of an approach based on voting and other on stacking.</p><p>The system depicted in (Cerón-Guzmán, 2016) is also based on an approach of ensemble classifiers. In this case the base classifiers used a classifier based on logistic regression and they are combined by voting. <ref type="bibr">Alvarez et al. (2016)</ref> exposed the participation of the team GTI on the task 2. The system is similar to the system of the team ELiRF-UPV in the sense that it is composed by two layers: context identification and polarity classification. Regarding the identification of the context, the authors design a heuristic method based on lexical markers. The polarity classification system is a SVM classifier that uses different type of features in order to represent the contexts of the entities and the aspects.</p><p>Montejo-Ráez and Díaz-Galiano (2016) introduce a system based on a supervised learning algorithm over vectors resulting from a weighted vector. This vector is computed using a Word2Vec algorithm. This method, which is inspired from neural-network language modelling, was executed with a collection of tweets written in Spanish and the Spanish Wikipedia in order to generate a set of word embeddings for the representation of the words of the General Corpus of TASS as dense vectors. The creation of the collection of tweets written in Spanish followed a distant supervision approach by means the assumption that tweets with happy and sad emoticons express emotions or opinions. Their experiments show massive data from Twitter can lead to a slight improvement in classification accuracy.</p><p>The system presented by the team LABDA (Quirós, Segura-Bedmar and Paloma <ref type="bibr">Martínez, 2016)</ref> is similar to the one submitted by SINAI <ref type="bibr" target="#b3">(Montejo-Ráez and Díaz-Galiano, 2016)</ref> because it also used word embeddings as schema of representation of the meaning of the words of the tweets. Quirós, Segura-Bedmar and Paloma <ref type="bibr">Martínez (2016)</ref> assessed the performance of the SVM and Logistic Regression as classifiers.</p><p>Casasola Murillo and Marín Reventós (2016) submitted an unsupervised system based on the system described in Turney ( <ref type="formula">2002</ref>), but with a specific adaptation to the classification of tweets written in Spanish.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.4">Analysis</head><p>In Table <ref type="table" target="#tab_8">5 and Table 6</ref> are shown the results of each system and they are ranked by the F1score reached, so it is not hard to know what is the best system in the edition of 2016.</p><p>On the other hand, how many tweets were rightly classified by the submitted systems? Is there a set of tweets that were not rightly classified by any system? What are the most difficult tweets to classify? These questions are going to be answered in the following paragraphs?</p><p>Table <ref type="table" target="#tab_12">8</ref> shows the rate of tweets that are rightly classified by a number of systems. There are about a 6% of tweets whose polarity is not inferred by any of the submitted systems. In other words, the submitted systems in the edition of 2016 are able to classify about the 94% of the test set. So, what is the main features of that 6% of tweets that any system inferred their polarity?     All the systems submitted are based on linear classifiers that do not take into account the context of each word, which means a big drawback for the understanding the meaning of a span of text.</p><p>The tweets of the Figures <ref type="figure" target="#fig_5">3, 4 and 5</ref> show that opinions and emotions are not only expressed by lexical markers, so the future participants should take into account the challenging task of implicit opinion analysis, irony and sarcasm detection. These new problems may be framed on the semantic level of Natural Language Processing and should be tackled by the research community in order to go a step further in the understanding of the subjective information, which is continuously published on the Internet.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Id: 171304000392663040</head><p>Sacarle 17 puntos en la final de Copa al Barça CB en el Palau Sant Jordi es una pasada. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6">Conclusions and Future Work</head><p>TASS was the first workshop about SA focused on the processing of texts written in Spanish. In the three first editions of TASS, the research community were mainly formed by Spanish researchers, however since the last edition, the researchers that come from South America is making bigger, so it is an evidence that the research community of Sentiment Analysis in Spanish is not only located in Spain and is formed by the Spanish speaking countries.</p><p>Anyway, the developed corpus and gold standards, and the reports from participants will for sure be helpful for knowing the state of the art in SA in Spanish.</p><p>The future work will be mainly focused on the definition of a new General Corpus because of the following reasons: 1. The language used on Twitter changes faster than the language used in traditional genres of texts, so the update of the corpus is required in order to cover a real used of the language on Twitter. 2. After several editions of the workshop, we realize that the quality of the annotation is not extremely good, so it is required to define a new corpus with a high quality annotation in order to provide a real gold standard for Spanish SA on Twitter.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">The research community deeply know the</head><p>General Corpus of TASS and it wants a new challenge. A significant amount of new tasks is currently being defined in Natural Language Processing, so some of them, such as stance classification, will be studied to be proposal for the next edition of TASS. Resumen: Se describe el sistema para análisis de sentimiento desarrollado por el Grupo de Análisis de Sentimiento GAS-UCR de la Universidad de Costa Rica para la tarea 1 del workshop TASS 2016. El sistema propuesto está basado en el uso de vectores de características de baja dimensión para representación del texto. Se propone un modelo simple fundamentado en la normalización de texto con identificación de marcadores de énfasis, el uso de modelos de lenguaje para representar las características locales y globales del texto, y características como emoticones y partículas de negación. Los primeros experimentos muestran las mejoras que se obtienen en la precisión al identificar la polaridad de textos completos conforme se van incorporando las características aquí mencionadas. Palabras clave: análisis de sentimiento, clasificación de textos por polaridad, textos cortos Abstract: The Sentiment Analisys System developed by GAS-UCR team of the University of Costa Rica for task 1 of TASS 2016 workshop is presented. Preliminar evaluation results of the proposed Sentiment Analysis System are presented. The system is based on low dimension feature vectors for text representation. The proposed model is based on text normalization with emphasis mark identification, the use of local and global language models, and other features like emoticons an negation terms. Initial experimentation shows that the introduction of the selected features have a positive impact on precision at the polarity classification task. Keywords: sentiment analysis, polarity based text clasification, short texts.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introducción</head><p>Este trabajo tiene como propósito describir el sistema utilizado por el grupo de investigación en análisis de sentimiento de la Universidad de Costa Rica en su participación en el taller TASS2016 <ref type="bibr">(García-Cumbreras et al., 2016)</ref>. El enfoque del trabajo del grupo ha sido el estudio de los factores que van incidiendo en las mejoras en la precisión obtenida al llevar a cabo la clasificación de la polaridad de tweets en idioma español. Nuestro sistema se fundamenta en tres elementos básicos que son: la normalización del texto en la etapa de preprocesamiento identificando los poten- * Este trabajo se ha llevado a cabo gracias al apoyo económico de la Universidad de Costa Rica y el Gobierno de la República de Costa Rica a través del MICITT. Se agradece a los asistentes del grupo de investigación GAS-UCR por su trabajo ciales marcadores de énfasis presentes en el mismo, la creación de vectores de características de dimensión reducida para disminuir el efecto de la dispersión de los datos, y la exploración del impacto del uso de diccionarios de polaridad que se generan mediante la utilización de diferentes modelos de representación del lenguaje asociados tanto al contexto local como global de los datos. Para esto estamos utilizando una adaptación propia del algoritmo de Turney (Turney, 2002)sobre un corpus de 5 millones de tweets en español. Estos modelos se almacenan en forma de diccionarios con polaridad para su posterior reutilización. Nos interesa particularmente la investigación en este campo dado que si bien desde el año 2013 se identificó una brecha importante entre la cantidad de investigación y tecnología del lenguaje desarrollada para el idioma inglés y el español <ref type="bibr">(Cambria et al., 2013</ref><ref type="bibr">) (Melero et al., 2012)</ref>, de la misma forma debemos tener presente que no necesariamente las soluciones para español peninsular van a tener los mismos resultados al aplicarse a variantes de español americano, por lo que los recursos y métodos que utilizamos tienen la intensión de aportar a la investigación en español y colaborar para su posterior aplicación en otros contextos de habla hispana.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Antecedentes</head><p>Entre los resultados obtenidos con sistemas con enfoques basados en aprendizaje máquina, el uso de máquina de soporte vectorial (MSV) ha ofrecido buenos resultados tanto en inglés (Kiritchenko, Zhu, y Mohammad, 2014) y (Batista y Ribeiro, 2013) como en español donde 9 de los 14 sistemas para el español presentados en TASS2015 <ref type="bibr">(Villena-Román et al., 2015)</ref> hacían uso de este tipo de clasificador. Sin embargo, la dependencia del lenguaje hace que estos clasificadores dependan de los vectores de características con los que son representados los comentarios de texto. Esta extracción de características ha sido el foco de atención de múltiples trabajos como (Cabanlit y Junshean Espinosa, 2014) , <ref type="bibr">(Feldman, 2013)</ref>, (Guo y Wan, 2012), (Sharma y Dey, 2012) y <ref type="bibr" target="#b66">(Wang et al., 2011)</ref>. En trabajos recientes de análisis de sentimiento en español tales como el trabajo de <ref type="bibr">(Martínez-Cámara et al., 2015)</ref> se utilizan varios diccionarios de polaridad y se representan utilizando un modelo de espacio vectorial MEV. El diccionario en sí se convierte en un modelo de lenguaje que sirve como recurso para lograr representaciones eficientes de los vectores utilizados para la clasificación.</p><p>En los últimos años la representación vectorial basada en modelos de lenguaje como unigramas y bigramas se movió hacia representaciones de características ya que la cantidad de términos introduce un problema asociado a su alta dispersión en el vector <ref type="bibr">(Cambria et al., 2013)</ref>. Si los vectores contienen un alto número de atributos diferentes, uno por término, los conjuntos de datos para entrenamiento deben contener una mayor cantidad de textos anotados que atributos para un buen entrenamiento de los clasificadores. Es por esto que los modelos de representación del lenguaje basados en unigramas, bigramas o bien skipgramas requiren de una representación vectorial eficiente. Trabajos recientes buscan la representación vectorial de las palabras en el espacio continuo como es el caso del uso de Word2Vect (Díaz-Galiano y Montejo-Ráez, 2015).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Descripción del sistema</head><p>Nuestro sistema se fundamenta en cuatro elementos que consideramos importantes de mencionar. Primero nos referiremos a la forma en que construimos nuestro diccionario con la polaridad de los términos y las razones para haber construido uno propio. Posteriormente nos referimos a nuestro proceso de preprocesamiento e identificación de potenciales marcadores de énfasis durante esta etapa inicial. En la siguiente subsección explicamos la forma en que construimos vectores de baja dimensión con información y hacemos uso del diccionario. Finalmente se menciona la forma en que se pretende capturar en los vectores de características aspectos locales con respecto a los datos de entrenamiento, y globales, a partir de modelos de representación del lenguaje general.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1">Creación del diccionario polarizado</head><p>Decidimos desarrollar diccionarios de polaridad propios, en lugar de utilizar los existentes, ya que consideramos que desde el punto de vista del procesamiento de lenguaje natural tradicional (Indurkhya y Damerau, 2010) estos diccionarios con polaridad pueden ser vistos cada uno, como un modelo de lenguaje particular. Por este motivo tratamos de desarrollar y evaluar una adaptación del tradicional método de generación de estos recursos lingüísticos <ref type="bibr">de (Turney, 2002)</ref>. La decisión anterior no se debió a la no existencia de diccionarios polarizados ya que claramente en trabajos como <ref type="bibr">(Martínez-Cámara et al., 2015)</ref> se hace uso de varios de ellos, sino con el fin de incorporar la etapa de creación de diccionario dentro de la metodología de trabajo para que posteriores investigaciones en otros países de habla hispana puedan replicar el trabajo y disminuir la barrera inicial asociada a la falta de recursos lingüísticos propios y el efecto del uso del diccionario polarizado sobre la calidad de los resultados de clasificación.</p><p>El diccionario de polaridad creado utiliza un corpus recolectado durante el año 2013, con 5 millones de tweets en español. La variante con respecto al algoritmo propuesto por Turney (Turney, 2002) es la siguiente. Para el cálculo de la orientación semántica de un término, tal y como lo define Turney en su artículo original, se utilizaron grupos de palabras semilla en lugar de un solo término, y en lugar de utilizar consultas a motores de búsqueda para obtener la cantidad de textos donde aparecen las palabras analizadas cerca de las palabras positivas o negativas se utilizó el motor de búsqueda implementado con el software libre Solr http://lucene.apache.org/solr/. Con el motor se indexaron los 5 millones de tweets por lo que las consultas se ejecutaron en forma local. Este método cuenta con la ventaja de que se puede calcular entonces la orientación semántica de un término directamente o bien almacenarlo en un diccionario. En nuestro caso precalculamos la polaridad y la almacenamos en forma de diccionario. Por el momento solo se han llevado a cabo los cálculos para términos individuales.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2">Normalizador de texto con marcadores de énfasis</head><p>Luego de un proceso de análisis de las características presentes en el texto desarrollamos un sistema para normalización del texto. Para este preprocesamiento se segmentan los términos potenciales, signos de puntuación y emoticones. Se lleva a cabo un marcado y conversión de los términos. El proceso que seguimos hace una eliminación de los términos que son identificados en el diccionario. Este proceso se muestra en la figura 1. Las repeticiones de letras, repeticiones de sílabas y mayúsculas son identificadas y eliminadas pero estos términos se marcan como potenciales identificadores de énfasis. Ejemplos son: EXCELENTE, graciassss, buenisísimo. En esta fase se identifican los tweets que contienen palabras positivas con énfasis para su posterior uso.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.3">Representación vectorial de baja dimensión</head><p>Dos características representadas en los vectores tienen que ver con la presencia y polaridad de los emoticones y con la presencia de partículas de negación. Además, al desarrollar esta investigación se pudo observar que los términos positivos con marcadores de énfasis son un potencial identificador de la polaridad positiva de los textos que los contienen, por lo tanto esta característi-Figura 1: Proceso de normalizacion del texto ca también fue incorporada. La presencia de marcadores de énfasis tales como repetición de caracteres, de sílabas, o mayúsculas sobre términos que aparecen como negativos en algún contexto son registrados como una característica importante en el vector. Los vectores generados utilizan la polaridad de los términos para determinar la posición en el vector de características creado. Cabe dejar claro que dependiendo del modelo de datos los términos pueden ser unigramas, bigramas o skipgramas. En el caso de los unigramas, por ejemplo, si se construye un vector con la frecuencia de los términos según su polaridad con valores de polaridad desde -1.0 hasta 1.0, el vector que se obtiene sería como el que se muestra en la figura 2. En este vector por ejemplo se muestran dos términos con polaridad, según diccionario, entre el -0.8 y -0.9, un término con polaridad entre 0.1 y 0.2, y otro con polaridad mayor a 0.9. En este caso, en nuestro diccionario, la polaridad se representa con valores distribuidos desde lo más negativo hasta lo positivo con valores entre -1.0 y 0 para los negativos y 0 a 1.0 para los positivos.</p><p>Para el taller TASS2016 quisimos evaluar inicialmente el uso de vectores con la menor dimensión posible, así que en lugar de vectores de 20 celdas utilizamos solo vectores de 5 celdas para cada grupo de características, en lugar de saltos de 0.1 el rango utilizado es de Figura 2: Vector de características 0.5.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.4">Modelos locales y globales de representación del lenguaje</head><p>Nuestra propuesta pretende representar en los vectores de características información propia obtenida durante el proceso de entrenamiento, al igual que datos que representen información obtenida de modelos de lenguaje del español en general. En nuestro caso se utilizó inicialmente el diccionario generado a partir del corpus recolectado como insumo para obtener de él la información general del español. En el momento de entrenamiento, la polaridad de los términos en cada tweet son conocidos para ese conjunto de datos. La información global es la que se ha calculado previamente y se encuentra almacenada en forma de diccionarios. En nuestra propuesta lo que queremos hacer es representar en el vector las frecuencias de los términos de cada tweet distribuidos según su polaridad pero utilizar diferentes modelos de representación de lenguaje para llevar a cabo este cálculo. El diccionario utilizado en estos experimentos fue nuestra versión con unigramas. Se pretende utilizar representaciones con bigramas y una versión de skipgramas que incluye solo los términos anteriores a la palabra que se desea representar. Durante el entrenamiento, la polaridad obtenida en forma local es almacenada al igual que las frecuencias tomadas de diccionarios de polaridad global. Por lo tanto, los vectores cuentan con entradas para las distribuciones de polaridad local y las distribuciones de polaridad global. Aquí es donde incorporamos los diferentes modelos de lenguaje. Inicialmente trabajamos con unigramas para obtener resultados base para posteriores experimentos. Posteriormente, se genera un diccionario para bigramas y otro para lo que definimos como skip-gramas previos. Por el momento estas variantes no fueron enviadas como experimentos a TASS2016 sino solo las versiones iniciales.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Metodología</head><p>Utilizando el diccionario, el normalizador y el modelo de representación vectorial se procedió a crear vectores de respresentación con diferentes configuraciones. Primeramente se construyó una versión con vectores de dimensión 20 distribuyendo la polaridad de los términos según la polaridad almacenada para unigramas en el diccionario local. En este caso se pretende evaluar solamente el uso del diccionario y los marcadores de énfasis como repeticiones y mayúsculas. Este primer experimento es el denominado GASUCR-01. El segundo experimento consistió en evaluar un modelo un poco más robusto a nivel local con bigramas y la polaridad para el unigrama en el diccionario, si el bigrama no está presente durante el proceso de evaluación. En este caso se crearon vectores de menor dimensión para los datos locales, con solo cinco campos. Esta ejecución se idendificó como experimento GASUCR-01-noEMO-noPartNeg. Esta es la implementación base para luego evaluar el uso de bigramas tomados del contexto global. Esta versión base también fue enviada a la tarea de 4 categorías. En este caso, lo que se hizo fue unir las categorías +P y P en una sola, y la categoría +N con la N. El tercer experimento agregaba al anterior el uso de los emoticones, aparición de términos positivos con énfasis y las partículas negativas. En los resultados esta versión se identificó como GASUCR-04 En esta versión de TASS no nos dió tiempo de ejecutar las versiones con bigramas globales, ni skipgramas. Estos casos se fueron seleccionando para ir evaluando en forma incremental cada uno de los aspectos relacionados a nuestra propuesta. Con cada característica nueva se trata de determinar su impacto sobre los valores de exactitud, precisión y exhaustividad.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6">Conclusiones y trabajo futuro</head><p>El marco de evaluación de TASS es provechoso para los grupos que inician la investigación en análisis de sentimiento en español con el fin de extenderla a otras latitudes. En nuestro caso pudimos evaluar y comparar la calidad de los resultados de los primeros casos base de nuestro trabajo. Observamos los primeros resultados con un sistema que utiliza un método de normalización con identificación de potenciales marcadores de énfasis, un modelo de representación basado en vectores de baja dimensión, y modelos de representación del texto con características locales y globales. El trabajo además hace uso de características comunes con otros como los son el uso de emoticones y partículas negativas. Como trabajo futuro tenemos pendiente la evaluación usando 3 categorías de los datos que hacen uso de contexto local con bigramas y características adicionales como uso de emoticones, palabras positivas con énfasis, y partículas de negación. Esperamos que los mejores resultados sean obtenidos al incorporar los nuevos modelos de lenguaje que estamos calculando para bigramas y skipgramas previos al unirlo con nuestro método de representación en vectores de baja dimensión. Se desea estudiar el efecto de la reducción del tamaño del vector al igual que técnicas de extrapolación de la polaridad en los modelos para los términos que no aparecen en los datos de entrenamiento. LABDA at the 2016 TASS challenge task: using word embeddings for the sentiment analysis task *</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>Knowing the opinion of customers or users has become a priority for companies and organizations in order to improve the quality of their services and products. With the ongoing explosion of social media, it affords a significant opportunity to poll the opinion of many Internet users by processing their comments. However, it should be noted that sentiment analysis, which can be defined as the automatic analysis of opinion in texts <ref type="bibr">(Pang and Lee, 2008)</ref>, is a challenging task because it is not strange that different people assign different polarities to a given text. On Twitter, the task is even more difficult, because the texts are small (only 140 characters) and are charectized by their informal style language, many grammatical errors and spelling mistakes, slang and vulgar vocabulary and abbreviations.</p><p>Since their introduction in 2013, the TASS shared task editions have had as main goal to promote the development of methods and resources for sentiment analysis of tweets in Spanish. This paper describes the participation of the LABDA group at the Task 1 (Sentiment Analysis at global level). In this task, the participating systems have to determine the global polarity of each tweet in the test dataset. There are two different evaluations: one based on 6 different polarity labels (P+, P, NEU, N, N+, NONE) and another based on just 4 labels (P, N, NEU, NONE). A detailed description of the task can be found in the overview paper <ref type="bibr" target="#b46">of TASS 2016</ref><ref type="bibr">(García-Cumbreras et al., 2016)</ref>. Our approach exploits word embedding representations for tweets and machine learning algorithms such as SVM and logistics regression. The word embedding model can yield significant dimensionality reduction compared to the classical Bag-Of-Word (BoW) model. The dimensionality redution can have several positive effects on our algorithms such as faster training, avoiding overfitting and better performance.</p><p>The paper is organized as follows. Section 2 describes our approach. The experimental results are presented and discussed in Section 3. We conclude in Section 4 with a summary of our findings and some directions for future work.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">System</head><p>In this paper, we study the use of word embeddings (also known as word vectors) in order to represent tweets and then examine several machine learning algorithms to classify them. Word embeddings have shown promising results in NLP tasks, such as named entity recognition (Segura-Bedmar, Suárez-Paniagua, and Martınez, 2015), relation extraction <ref type="bibr">(Alam et al., 2016)</ref>, sentiment analysis <ref type="bibr" target="#b12">(Socher et al., 2013b)</ref> or parsing <ref type="bibr" target="#b11">(Socher et al., 2013a)</ref>. A word embedding is a function to map words to low dimensional vectors, which are learned from a large collection of texts. At present, Neural Network is one of the most used learning techniques for generating word embeddings <ref type="bibr">(Mikolov and Dean, 2013)</ref>. The essential assumption of this model is that semantically close words will have similar vectors (in terms of cosine similarity). Word embeddings can help to capture semantic and syntactic relationships of the corresponding words.</p><p>While the well-known Bag-of-Words (BoW) model involves a very large number of features (as many as the number of nonstopwords words with at least a minimum number of occurrences in the training data), the word embedding representation allows a significant reduction in the feature set size (in our case, from million to just 300). The dimensionality reduction is a desirable goal, because it helps in avoiding overfitting and leads to a reduction of the training and classification times, without any performance loss.</p><p>As a preprocessing step, tweets must be cleaned. First, we remove all links and urls. We then remove usernames which can be easily recognized because their first character is the symbol @. We then transform the hashtags to words by removing its first character (that is, the symbol #). Taking advantage of regular expressions, the emoticons are detected and classified in order to count the number of positive and negative emoticons in each tweet and then we remove them from the text. Table <ref type="table" target="#tab_0">1</ref> shows the list of positive and negative emoticons, which were taken from the wikipedia page https://en.wikipedia. org/wiki/List\_of\_emoticons. We con-vert the tweets to lowercase and replace misspelled accented letters with the correct one (for instance "à" with "á"). We also treat elongations (that is, the repetition of a character) by removing the repetition of a character after its second occurrence (for example, "hoooolaaaa" would be translated to "hola"). We then decided to take into account laughs (for instance "jajaja") which turned out to be challenging because of the diverse ways they are expressed (i.e. expressions like "jajajaja" or "jejeje" and even misspelled ones like "jajjajaaj") We addressed this using regular expressions to standardize the different forms (i.e. "jajjjaaj" to "jajaja") and then replace them with the word "risas". Finally we remove all non-letters characters and all stopwords present in tweets<ref type="foot" target="#foot_1">1</ref> . Table <ref type="table" target="#tab_0">1</ref>: List of positive and negative emoticons</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Orientation Emoticons</head><p>Once the tweets are preprocessed, they are tokenized using the NLKT toolkit (a Python package for NLP); we also performed experimentation by lemmatizing each tweet using MeaningCloud<ref type="foot" target="#foot_2">2</ref> Text Analytic software to compare both approaches. Then, for each token, we search its vector in the word embedding model. We use a pretrained model <ref type="bibr" target="#b4">(Cardellino, 2016)</ref>, which was generated by using the word2vec algorithm <ref type="bibr">(Mikolov and Dean, 2013)</ref> from a collection of Spanish texts with approximately 1.5 billion words. The dimension of the word embedding is 300. It should be noted that these texts were taken from different resources such as Spanish Wikipedia, WikiSource and Wikibooks, but none of them contains tweets. Therefore, it is possible that the main characteristics of the social media texts (such as informal style language, noisy, plenty of grammatical errors and spelling mistakes, slang and vulgar vocabulary, abbreviations, etc) are not correctly represented in this model. One of the main problems is that there is a significant number of words (almost a 13 % of the vocabulary, representing the 6 % of words occurrences) that are not found in the model. We perform a review of a small sample of these words, showing that most of them were mainly hashtags.</p><p>In our approach, a tweet of n tokens (T = w 1 , w 2 , ..., w n ) is represented as the centroid of the word vectors w i of its tokens, as shown in the following equation:</p><formula xml:id="formula_2">T = 1 n n i=1 w i = N j=1 w j .T F (w j , t) N j=1 T F (w j , t)<label>(1)</label></formula><p>where N is the vocabulary size, that is, the total number of distinct words, while T F (w j , t) refers to the number of occurrences of the j-th vocabulary word in the tweet T.</p><p>We also explore the effect of including the inverse document frequencies IDF to represent tweets (see Equation <ref type="formula">2</ref>). This helps to increase the weight of words that occur often, but only in a few documents, while it reduces the relevance of words that occur very frequently in a larger number of texts.</p><formula xml:id="formula_3">T = 1 n n i=1 w i = N j=1 w j .T F (w j , t).IDF (w j ) N j=1 T F (w j , t).IDF (w j )</formula><p>(2) having IDF (w j ) = log|D| |tw∈D:w j ∈tw| where |D| refers to the number of tweets.</p><p>In addition to using the centroid, we assess the impact of complementing the tweet model with the following additional features: posWords: number of positive words present in the tweet. negWords: number of negative words present in the tweet. posEmo: number of positive emoticons present in the tweet. negEmo: number of negative emoticons present in the tweet.</p><p>For the posWords and negWords features we used the iSOL lexicon <ref type="bibr">(Molina-González et al., 2013)</ref>, a list composed by 2,509 positive words and 5,626 negative words. As described before, for the emoticons we used the listed in Table <ref type="table" target="#tab_0">1</ref>, but also added to the positive ones the number of laughs detected; and also, we included the number of recommendations present in the form of a "Follow Friday" hashtag (#FF), due to its ease of detection and its positive bias.</p><p>Classification is performed using scikitlearn, a Python module for machine learning. This package provides many algorithms such as Random Forest, Support Vector Machine (SVM) and so on. One of its main advantages is that it is supported by extensive documentation. Moreover, it is robust, fast and easy to use.</p><p>As stated before, we have two main training models: Averaged centroids and the averaged centroids including the inverted document frequency, for both the lemmatized and not-lemmatized texts. We performed experiments using three different classifiers: Random Forests, Support Vector Machines and Logistic Regression because these classifiers often achieved the best results for text classification and sentiment analysis.</p><p>Also we evaluated the impact of applying a set of emoticon's rules as a pre-classification stage, similar to <ref type="bibr" target="#b5">(Chikersal et al., 2015)</ref>, in which we determine a first stage polarity for each tweet as follows:</p><p>If posEmo is greater than zero and negEmo is equal to zero, the tweet is marked as "P".</p><p>If negEmo is greater than zero and posEmo is equal to zero, the tweet is marked as "N".</p><p>If both posEmo and negEmo are greater than zero, the tweet is marked as "NEU".</p><p>If both posEmo and negEmo are equal to zero, the tweet is marked as "NONE".</p><p>Then, after the classification takes place we made three tests: i) Applying no rule, ii) honoring the polarity defined by the rule, which means, we keep the predefined polarity if the tweet was marked as "P" or "N", otherwise we take the value estimated by the classifier, and iii) a mixed approach where we give each polarity a value (N+: -2; N: -1; NEU,NONE: 0; P: 1; P+: 2) and performed an arithmetic sum of both the predefined and estimated polarity if and only if they are not equal; with that for instance, if the classifier marked a tweet as "N" and the rules marked it as "P" the tweet will be classified as "NEU".</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Results</head><p>In order to choose the best-performing classifiers, we use 10-fold cross-validation because there is no development dataset and this strategy has become the standard method in practical terms. Our experiments showed that, although the results were similar<ref type="foot" target="#foot_3">3</ref> , the best settings for the 5-levels task are: RUN-1: Support Vector Machine, over the averaged centroids without applying any rules for pre-defining polarities.</p><p>RUN-2: Support Vector Machine, over the averaged centroids and applying the mixed rules approach. RUN-3: Logistic Regression, over the centroids with inverted document frequency and applying the mixed rules approach.</p><p>and for the 3-levels task are: RUN-1: Support Vector Machine, over the averaged centroids and applying the mixed rules approach. RUN-2: Logistic Regression, over the centroids with inverted document frequency and applying the mixed rules approach.</p><p>RUN-3: Logistic Regression, over the averaged centroids and applying the mixed rules approach.</p><p>Tables <ref type="table" target="#tab_25">2 and 3</ref> show the results for these settings provided by the TASS submission system. For each run, accuracy is provided as well as the macro-averaged precision, recall and F1-measure. As expected, the results for 3 levels are higher than for 5 levels because the training dataset is larger. With the settings mentioned above, the obtained results are extremely similar, but we can state that, in terms of Accuracy, Logistic Regression report the best results; and, even it's not measured in this work, is worth mentioning that Logistic Regression's performance was observably faster.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Run</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Conclusions and future work</head><p>This paper explores the use of word embeddings for the task of sentiment analysis. Instead of using, the bag-of-words model to represent tweets, these are represented as word vectors taken from a pre-trained model of word embeddings. An important advantage of word embedding model compared to the technique of bag-of-words representation is that it achieves a significant dimensional reduction of the feature set needed to represent tweets and leads, therefore, to a reduction of training and testing time of the algorithms.</p><p>In order to use word embedding models properly, a preprocessing stage had to be completed before training a classifier. Due to the unstructured nature of the tweets, this preprocessing proved to be a very important step in order to standardize at some degree the input data. The experimentation showed that the three tested classifiers obtained very similar results, with Random Forest having slight worse performance and Logistic Regression being slightly better and much more faster.</p><p>One of the main drawback of our approach is that many words do not have a word vector in the word embedding model used for our experiments. An analysis showed that many of these words come from hashtags, which are usually short phrases. Therefore, we should apply a more sophisticated method in order to extract the words forming hashtag.</p><p>As future work, we also plan to use a word embedding model trained on a collection of text from Spanish social media. We think that this will have a positive effect of the performance of our system to identify the polarity of tweets because this model will be generated from documents characterized by the main features that describe social media texts (for example, informal style language, plenty of grammatical errors and spelling mistakes, slang and vulgar vocabulary).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>JACERONG at TASS 2016: An Ensemble Classifier for 1 Introduction</head><p>What people say on social media about issues of their everyday life, the society, and the world in general, has turned into a rich source of information to understand social behavior. Twitter content, in particular, has caught the attention of researchers who have investigated its potential for conducting studies on the human subjectivity at large scale, which was not feasible using tradi-tional methods. Around election time, sentiment analysis of political tweets has been widely used to capture trends in public opinion regarding important issues such as voting intention (Gayo-Avello, 2013). However, analyzing this content also presents several challenges, including the development of text analysis approaches based on Natural Language Processing techniques, which properly adapt to the informal genre and the free writ-ing style of Twitter <ref type="bibr" target="#b15">(Han and Baldwin, 2011;</ref><ref type="bibr">Cerón-Guzmán and León-Guzmán, 2016)</ref>.</p><p>TASS is a workshop aimed at fostering research on sentiment analysis of Spanish Twitter data, which provides a benchmark evaluation to compare the latest advances in the field <ref type="bibr">(García-Cumbreras et al., 2016)</ref>. One of the proposed tasks is to determine the opinion orientation expressed in tweets at global level. Task 1 consists on assigning one of six labels (P+, P, NEU, N, N+, NONE) to a tweet in the six-labels evaluation; or one of four labels (P, NEU, N, NONE) in the four-labels evaluation. Here, P, N, and NEU, stand for positive, negative, and neutral, respectively; NONE, instead, means no sentiment. The "+" symbol is used as intensifier.</p><p>This paper presents an ensemble-based approach to polarity classification of Spanish tweets, developed to participate in Task 1 proposed by the organizing committee of the TASS workshop. The ensemble members are (relatively) highly correct classifiers with the lowest absolute correlation with each other. The output from each classifier, which may be either a class label or probabilities for each class, is used to assign the polarity to a tweet based on a majority rule or on the highest unweighted average probability. Moreover, classifiers are adapted to deal with non-standard lexical forms in tweets, in order to improve the quality of natural language analysis.</p><p>The remainder of this paper is organized as follows. Section 2 describes the common architecture of the ensemble members (i.e., classifiers). Next, the submitted experiments, as well as the obtained results, are discussed in Section 3. Finally, Section 4 concludes the paper.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">The System Architecture</head><p>The tweet text is passed through the pipeline of each system in order to assign it a class label or a probability to be of a certain class. The pipeline, which goes from text preprocessing to machine learning classification, is described below. Note that the system term is preferred over the classifier term, because a machine learning classifier receives a feature vector and produces a class label or probabilities for each class; instead, the system term enables to conceive the whole process, from preprocessing to machine learning classification.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1">Preprocessing</head><p>The process of text cleaning and normalization is performed in two phases: basic preprocessing and advanced preprocessing.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.1">Basic Preprocessing</head><p>The following simple rules are implemented as regular expressions:</p><p>• Removing URLs and emails.</p><p>• HTML entities are mapped to textual representations (e.g., "&amp;lt;" → "&lt;").</p><p>• Specific Twitter terms such as mentions (@user) and hashtags (#topic) are replaced by placeholders.</p><p>• Unknown characters are mapped to their closest ASCII variant, using the Python Unidecode module for the mapping.</p><p>• Consecutive repetitions of a same character are reduced to one occurrence.</p><p>• Emoticons are recognized and then classified into positive and negative, according to the sentiment they convey (e.g., ":)" → "EMO POS", ":(" → "EMO NEG").</p><p>• Unification of punctuation marks (Vilares, Alonso, and Gómez-Rodrıguez, 2014).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.2">Advanced Preprocessing</head><p>Once the set of simple rules has been applied, the tweet text is tokenized and morphologically analyzed by FreeLing <ref type="bibr">(Padró and Stanilovsky, 2012)</ref>. In this way, for each resulting token, its lemma and Part-of-Speech (POS) tag are assigned. Taking these data as input, the following advanced preprocessing is applied:</p><p>• Lexical normalization. Each token is passed through a set of basic modules of FreeLing (e.g., dictionary lookup, suffixes check, detection of numbers and dates, and named entity recognition) for identifying standard word forms and other valid constructions. If a token is not recognized by any of the modules, it is marked as out-of-vocabulary (OOV) word. Then, a confusion set is formed by normalization candidates which are identical or similar to the graphemes or phonemes that make the OOV word. These candidates are elements of the union of a dictionary of Spanish standard word forms and a gazetteer of proper nouns. The best normalization candidate for the OOV word is which best fits a statistical language model. The language model was estimated from the Spanish Wikipedia corpus. Lastly, the selected candidate is capitalized according to the capitalization rules of the Spanish language. Extensive research on lexical normalization of Spanish tweets can be read in (Cerón-Guzmán and León-Guzmán, 2016).</p><p>• Negation handling. Inspired by the approach proposed by Pang et al. <ref type="bibr" target="#b19">(Pang, Lee, and Vaithyanathan, 2002)</ref>, this research defined a negated context as a segment of the tweet that starts with a (Spanish) negation word and ends with a punctuation mark (i.e., "!", ",", ":", "?", ".", ";"), but only the first n [0, 3] or all tokens labeled with any or a specific POS tag (i.e., verb, adjective, adverb, and common noun) are affected by adding it the " NEG" suffix. Note that when n = 0, no token is affected.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2">Feature Extraction</head><p>In this stage, the normalized tweet text is transformed into a feature vector that feeds the machine learning classifier. The features are grouped into basic features and n-gram features.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.1">Basic Features</head><p>Some of these features are computed before the process of text cleaning and normalization is performed.</p><p>• The number of words completely in uppercase.</p><p>• The number of words with more than two consecutive repetitions of a same character.</p><p>• The number of consecutive repetitions of exclamation marks, question marks, and both punctuation marks (e.g., "!!", "??", "?!") and whether the text ends with an exclamation or question mark.</p><p>• The number of occurrences of each class of emoticons (i.e., positive and negative) and whether the last token of the tweet is an emoticon.</p><p>• The number of positive and negative words, relative to the ElhPolar lexicon <ref type="bibr">(Saralegi and Vicente, 2013)</ref>, the AFINN lexicon <ref type="bibr" target="#b17">(Nielsen, 2011)</ref>, or an union of both lexicons. In a negated context, the label of a polarity word is inverted (i.e., positive words become negative words, and vice versa). Additionally, a third feature labels the tweet with the class whose number of polarity words in the text is the highest.</p><p>• The number of negated contexts.</p><p>• The number of occurrences of each Partof-Speech tag.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.2">N-gram Features</head><p>The fixed-length set of basic features is always extracted from tweets. However, the tweet text varies from another in terms of length, number of tokens, and vocabulary used. For that reason, a process that transforms textual data into numerical feature vectors of fixed length is required. This process, known as vectorization, is performed by applying the tf-idf weighting scheme <ref type="bibr">(Manning, Raghavan, and Schütze, 2008)</ref>. Thus, each document (i.e., a tweet text) is represented as a vector d = {t 1 , . . . , t n } R V , where V is the size of the vocabulary that was built by considering word n-grams with n [1, 4], or character n-grams with n [3, 5] in the collection (i.e., the training set). The vector is, hence, formed by word n-grams, character n-grams, or a concatenation of word and character n-grams.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.3">Machine Learning Classification</head><p>At the last stage, the sentiment analysis system classifies a given tweet as either P+, P, NEU, N, N+, or NONE, or assigns probabilities for each class. After receiving as input the feature vector, a L2-regularized Logistic Regression classifier assigns a class label to the tweet or a probability to be of a certain class. The classifier was trained on the training set, using the Scikit-learn <ref type="bibr">(Pedregosa et al., 2011)</ref> implementation of the Logistic Regression algorithm.</p><p>3 Experiments 1,720 different sentiment analysis systems were trained on the training set via 5-fold cross validation, in order to find the best parameter settings, namely: negation handling, polarity lexicon, order of word and character n-grams, and others parameters related to the vectorization process (e.g., lowercasing, frequency thresholds, etc.). The systems were sorted by their mean cross-validation score, and thus the top 50 ranked were filtered to build the ensemble. The training set is a collection of 7,219 tweets, each of which is tagged with one of six labels (i.e., P+, P, NEU, N, N+, and NONE). Note that the systems were trained for the six-labels evaluation, and therefore the P+ and P labels were merged into P, as well as the N+ and N labels were merged into N, to produce an output in accordance with the four-labels evaluation. Further description of the provided corpus, as well as of the training and test sets, can be read in <ref type="bibr">(García-Cumbreras et al., 2016)</ref>.</p><p>Next, the top 50 systems assigned a class label to each tweet in a collection of 1,000, which was drawn from the untagged test set with a similar class distribution to the training set. this stage, the objective was to find the systems with the lowest absolute correlation with each other; therefore, the performance was not evaluated. Then, the less-correlated combinations of 5, 10, and 25 systems, were used to build the ensembles, whose outputs correspond to the submitted experiments. These experiments are described below:</p><p>• run-1: the less-correlated combination of 5 systems, which chooses the class label that represents the majority in the predictions made by the ensemble members.</p><p>• run-2: the less-correlated combination of 10 systems, which chooses the class with the highest unweighted average probability.</p><p>• run-3: the less-correlated combination of 25 systems, which chooses the class with the highest unweighted average probability.</p><p>Tables <ref type="table" target="#tab_25">1 and 2</ref> show the performance evaluation on the test set (i.e., a collection of 60,798 tweets) for six and four labels, respectively. Accuracy has been defined as the official metric for ranking the systems. In summary, the main gain occurs among the "run-1" and "run-2" experiments, with an increment of 0.5% in accuracy in the six-labels As a final point, Table <ref type="table" target="#tab_3">3</ref> shows how the overall performance is affected by the low discriminative power of the ensembles (in this case, the one that correspond to "run-3") for the NEU class. With this in mind, it is proposed as future work to deal with the low representativeness of the NEU class in the training data (i.e., 9.28% of tweets), in order to properly characterize this kind of tweets.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Conclusion</head><p>This paper has described an ensemble-based approach for sentiment analysis of Spanish Twitter data at global level, developed in order to participate in Task 1 proposed by the organization of TASS workshop. Three ensembles were built on the combination of sentiment analysis systems with the lowest absolute correlation with each other. The systems were adapted to the informal genre and the free writing style that characterize Twitter, in order to improve the quality of natural language analysis. In this way, the predicted class label for a particular tweet was based on a majority rule or on the highest average probability. Experimental results showed that the less-correlated combination of 25 systems, which chose the class with the highest unweighted average probability, was the setting that best suited to the task. However, there is a great room for improvement in the learning of a proper characterization of neutral tweets.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Participación de SINAI en TASS 2016 *</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>SINAI participation in TASS 2016</head><p>A. Montejo-Ráez University of Jaén 23071 Jaén (Spain) amontejo@ujaen.es</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>M.C. Díaz-Galiano</head><p>University of Jaén 23071 Jaén (Spain) mcdiaz@ujaen.es</p><p>Resumen: Este artículo describe el sistema de clasificación de la polaridad utilizado por el equipo SINAI en la tarea 1 del taller TASS 2016. Como en participaciones anteriores, nuestro sistema se basa en un método supervisado con SVM a partir de vectores de palabras. Dichos vectores se calculan utilizando la técnicas de deeplearning Word2Vec, usando modelos generados a partir de una colección de tweets expresamente generada para esta tarea y el volcado de la Wikipedia en español. Nuestros experimentos muestran que el uso de colecciones de datos masivos de Twitter pueden ayudar a mejorar sensiblemente el rendimiento del clasificador.</p><p>Palabras clave: Análisis de sentimientos, clasificación de la polaridad, deeplearning, Word2Vec</p><p>Abstract: This paper introduces the polarity classification system used by the SI-NAI team for the task 1 at the TASS 2016 workshop. Our approach is based on a supervised learning algorithm over vectors resulting from a weighted vector. This vector is computed using a deep-learning algorithm called Word2Vec. The algorithm is applied so as to generate a word vector from a deep neural net trained over a specific tweets collection and the Spanish Wikipedia. Our experiments show massive data from Twitter can lead to a slight improvement in classificaciones accuracy. Keywords: Sentiment analysis, polarity classification, deep learning, Word2Vec, Doc2Vec</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introducción</head><p>En este trabajo describimos las aportaciones realizadas para participar en la tarea 1 del taller TASS (Sentiment Analysis at global level), en su edición de 2016 <ref type="bibr">(García-Cumbreras et al., 2016)</ref>. Nuestra solución continúa con las técnicas aplicadas en el TASS 2014 (Montejo-Ráez, García-Cumbreras, y Díaz-Galiano, 2014) y 2015 (Díaz-Galiano y Montejo-Ráez, 2015), utilizando aprendizaje profundo para representar el texto y una colección de entrenamiento creada con tweets que contienen emoticonos que expresan emociones de felicidad o tristeza. Para ello utilizamos el método Word2Vec, ya que ha obtenido los mejores resultados en años anteriores. Por lo tanto, generamos un vector de pesos para cada palabra del tweet utilizando Word2Vec, y realizamos la media * Este estudio está parcialmente financiado por el proyecto TIN2015-65136-C2-1-R otorgado por el Ministerio de Economía y Competitividad del Gobierno de España.</p><p>de dichos vectores para obtener una única representación vectorial. Nuestros resultados demuestran que el rendimiento del sistema de clasificación puede verse sensiblemente mejorado gracias a la introducción de estos datos en la generación del modelo de palabras, no así en el entrenamiento del clasificador de polaridad final.</p><p>La tarea del TASS en 2016 denominada Sentiment Analysis at global level consiste en el desarrollo y evaluación de sistemas que determinan la polaridad global de cada tweet del corpus general. Los sistemas presentados deben predecir la polaridad de cada tweet utilizando 6 o 4 etiquetas de clase (granularidad fina y gruesa respectivamente).</p><p>El resto del artículo está organizado de la siguiente forma. El apartado 2 describe el estado del arte de los sistemas de clasificación de polaridad en español. A continuación, se describe la colección de tweets con emoticonos utilizada para entrenar el clasificador. En el apartado 4 se describe el sistema desarro-llado y en el apartado 5 los experimentos realizados, los resultados obtenidos y el análisis de los mismos. Finalmente, en el último apartado exponemos las conclusiones y el trabajo futuro.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Clasificación de la polaridad en español</head><p>La mayor parte de los sistemas de clasificación de polaridad están centrados en textos en inglés, y para textos en español el sistema más completo, en cuanto a técnicas lingüísticas aplicadas, posiblemente sea The Spanish SO Calculator <ref type="bibr" target="#b24">(Brooke, Tofiloski, y Taboada, 2009)</ref>, que además de resolver la polaridad de los componentes clásicos (adjetivos, sustantivos, verbos y adverbios) trabaja con modificadores como la detección de negación o los intensificadores.</p><p>Los algoritmos de aprendizaje profundo (deep-learning en inglés) están dando buenos resultados en tareas donde el estado del arte parecía haberse estancado <ref type="bibr" target="#b48">(Bengio, 2009)</ref>. Estas técnicas también son de aplicación en el procesamiento del lenguaje natural (Collobert y Weston, 2008), e incluso ya existen sistemas orientados al análisis de sentimientos, como el de Socher et al. <ref type="bibr" target="#b36">(Socher et al., 2011)</ref>. Los algoritmos de aprendizaje automático no son nuevos, pero sí están resurgiendo gracias a una mejora de las técnicas y la disposición de grandes volúmenes de datos necesarios para su entrenamiento efectivo.</p><p>En la edición de TASS en 2012 el equipo que obtuvo mejores resultados (Saralegi Urizar y San Vicente Roncal, 2012) presentaron un sistema completo de pre-procesamiento de los tweets y aplicaron un lexicón derivado del inglés para polarizar los tweets. Sus resultados eran robustos en granularidad fina (65 % de accuracy) y gruesa (71 % de accuracy).</p><p>En la edición de TASS en 2013 el mejor equipo <ref type="bibr" target="#b27">(Fernández et al., 2013)</ref> tuvo todos sus experimentos en el top 10 de los resultados, y la combinación de ellos alcanzó la primera posición. Presentaron un sistema con dos variantes: una versión modificada del algoritmo de ranking (RA-SR) utilizando bigramas, y una nueva propuesta basada en skipgrams. Con estas dos variantes crearon lexicones sobre sentimientos, y los utilizaron junto con aprendizaje automático (SVM) para detectar la polaridad de los tweets.</p><p>En 2014 el equipo con mejores resultados en TASS se denominaba ELiRF-UPV (Hur-tado y <ref type="bibr">Pla, 2014)</ref>. Abordaron la tarea como un problema de clasificación, utilizando SVM. Utilizaron una estrategia uno-contratodos donde entrenan un sistema binario para cada polaridad. Los tweets fueron tokeninizados para utilizar las palabras o los lemas como características y el valor de cada característica era su coeficiente tf-idf. Posteriormente realizaron una validación cruzada para determinar el mejor conjunto de características y parámetros a utilizar.</p><p>El equipo ELiRF-UPV <ref type="bibr">(Hurtado, Pla, y Buscaldi, 2015)</ref> volvió a obtener los mejores resultados en la edición de TASS 2015 con una técnica muy similar a la edición anterior (SVM, tokenización, clasificadores binarios y coeficientes tf-idf). En este caso utilizaron un sistema de votación simple entre un mayor número de clasificadores con parámetros distintos. Los mejores resultados los obtuvieron con un sistema que combinaba 192 sistemas SVM con configuraciones diferentes, utilizando un nuevo sistema SVM para realizar dicha combinación.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">Colección de tweets con emoticonos</head><p>Los algoritmos de deep-learning necesitan grandes volúmenes de datos para su entrenamiento. Por ese motivo se ha creado una colección de tweets específica para la detección de polaridad. Para crear dicha colección se han recuperado tweets con las siguientes características:</p><p>Que contengan emoticonos que expresen la polaridad del tweet. En este caso se han utilizado los siguientes emoticonos:</p><p>• Positivos: :) :-) :D :-D</p><p>• Negativos: :( :-(</p><p>Que los tweets no contengan URLs, para evitar tweets cuyo contenido principal se encuentra en el enlace.</p><p>Que no sean retweets, para reducir el número de tweets repetidos.</p><p>La captura de dichos tweets se realizó durante 22 días, del 18/07/2016 hasta el 9/08/2016, recuperando unos 100.000 tweets diarios aproximadamente. Tal y como se ve en la Figura 1 la recuperación fue muy homogénea y se obtuvieron más de 2.000.000 de tweets. Eliminar menciones (nombres de usuario que empiezan el caracter @).</p><p>Sustituir letras acentuadas por sus versiones sin acentuar.</p><p>Quitar las palabras vacías de contenido (stopwords).</p><p>Normalizar las palabras para que no contengan letras repetidas, sustituyendo las repeticiones de letras contiguas para dejar sólo 3 repeticiones.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Descripción del sistema</head><p>Word2Vec 1 es una implementación de la arquitectura de representación de las palabras mediante vectores en el espacio continuo, basada en bolsas de palabras o n-gramas concebida por Tomas Mikolov et al. <ref type="bibr">(Mikolov et al., 2013)</ref>. Su capacidad para capturar la semántica de las palabras queda comprobada en su aplicabilidad a problemas como la analogía entre términos o el agrupamiento de palabras. El método consiste en proyectar las palabras a un espacio n-dimensional, cuyos pesos se determinan a partir de una estructura de red neuronal mediante un algoritmo recurrente. El modelo se puede configurar para que utilice una topología de bolsa de palabras (CBOW) o skip-gram, muy similar al 1 https://code.google.com/p/word2vec/ anterior, pero en la que se intenta predecir los términos acompañantes a partir de un término dado. Con estas topologías, si disponemos de un volumen de textos suficiente, esta representación puede llegar a capturar la semántica de cada palabra. El número de dimensiones (longitud de los vectores de cada palabra) puede elegirse libremente. Para el cálculo del modelo Word2Vec hemos recurrido al software indicado, creado por los propios autores del método.</p><p>Tal y como se ha indicado, para obtener los vectores Word2Vec representativos para cada palabra tenemos que generar un modelo a partir de un volumen de texto grande. Para ello hemos utilizado los parámetros que mejores resultados obtuvieron en nuestra participación del 2014 (Montejo-Ráez, García-Cumbreras, y Díaz-Galiano, 2014). Por lo tanto, a partir de un volcado de Wikipedia<ref type="foot" target="#foot_4">2</ref> en Español de los artículos en XML, hemos extraído el texto de los mismos. Obtenemos así unos 2,2 GB de texto plano que alimenta al programa word2vec con los parámetros siguientes: una ventana de 5 términos, el modelo skip-gram y un número de dimensiones esperado de 300, logrando un modelo con más de 1,2 millones de palabras en su vocabulario.</p><p>Como puede verse en la Figura 2, nuestro sistema realiza la clasificación de los tweets utilizando dos fases de aprendizaje, una en la que entrenamos el modelo Word2Vec haciendo uso de un volcado de la enciclopedia on-line Wikipedia, en su versión en español, como hemos indicado anteriormente. De esta forma representamos cada tweet con el vector resultado de calcular la media de los vectores Word2Vec de cada palabra en el tweet y su desviación típica (por lo que cada vector de palabras por modelo es de 600 dimensiones). Se lleva a cabo una simple normalización previa sobre el tweet, eliminando repetición de letras y poniendo todo a minúsculas. La segunda fase de entrenamiento utiliza el algoritmo SVM y se entrena con la colección de tweets con emoticonos explicada en el apartado 3. La implementación de SVM utilizada es la basada en kernel lineal con entrenamiento SGD (Stochastic Gradient Descent) proporcionada por la biblioteca Sci-kit Learn<ref type="foot" target="#foot_5">3</ref>  <ref type="bibr">(Pedregosa et al., 2011)</ref>.</p><p>Esta solución es la utilizada en las dos variantes de la tarea 1 del TASS con predicción de 4 clases: la que utiliza el corpus de tweets completo (full test corpus) y el que utiliza el corpus balanceado (1k test corpus).</p><p>Figura 2: Flujo de datos del sistema completo 5 Resultados obtenidos Hemos experimentado con el efecto que tienen en el rendimiento del sistema el uso de una colección de datos generada a partir de la captura de tweets y que han sido etiquetados según los emoticonos que contienen en la forma comentada anteriormente. La colección de más de 1,7 millones de tweets ha sido utilizada al completo para generar un modelo de vectores de palabras, cuya combinación con el de Wikipedia se ha analizado. También hemos comprobado cómo el uso de dicha colección de tweets afecta cuando se usa para el entrenamiento del modelo de clasificación de la polaridad. Para ello se han seleccionado 500,000 tweets aleatoriamente de esta colección, con sus correspondientes etiquetas P (positivo) o N (negativo) y se han combiando con la colecciónd de entrenamiento <ref type="bibr">de TASS.</ref> Los resultados según las medidaas de Accuracy y Macro F1 obtenidas se muestran en la tabla 1. La primera columna nos indica a partir de cuáles datos se han generado los modelos de vectores de palabras, bien sólo con Wikipedia (W) o como combinación de ésta con los tweets del corpus construido (W+T). La segunda columna indica cómo se ha entrenado el clasificador de polaridad a partir de los textos etiquetados vectorizados con los modelos generados en el paso previo, bien sólo usando los datos de entrenamiento proporcionados por la organizacion <ref type="bibr">(TASS)</ref> o incorporando los etiquetados a partir de emoticonos (TASS+T).</p><p>Como podemos observar, el uso de una colección de tweets para ampliar la capacidad de representar un modelo basado en vectores de palabras mejora sensiblemente al ge- Esto nos lleva a plantearnos la pregunta de qué ocurriría si utilizáramos sólo los tweets recopilados para generar un modelo de vectores de palabras. Los resultados que se obtienen son un 59,05 % de ajuste y un 44,43 % de F1. No cabe duda de que conviene explorar el uso de modelos de generación de características a partir de vectores de palabras.</p><p>Estos resultados mejoran nuestros datos del año pasado, en los que obtuvimos un ajuste del 61,19 % combinando vectores de palabras (Word2Vec) y vectores de documentos (Doc2Vec).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6">Conclusiones y trabajo futuro</head><p>A partir de los resultados obtenidos, encontramos que resulta interesante la incorporación de texto no formal (tweets) para la generación de los modelos de palabras, lo cual tiene su sentido en una tarea de clasificación que, precisamente, trabaja sobre textos no formales que tienen la misma red social como fuente. En cambio, el considerar que los emoticonos en un tweet pueden ayudar a un clasificador como SVM a mejorar en la determinación de la polaridad ha resultado una hipótesis fallida. Esto puede entenderse echando un vistazo a algunos de los tweets capturados por el sistema, donde se evidencia la dificultad, incluso para una persona, de poner en contexto el sentido del tweet y su consideración como positivo o negativo si no disponemos de un emoticono asociado.</p><p>Como trabajo futuro nos proponemos diseñar una red neuronal profunda más elaborada, pero que parta también de textos de entrenamiento tanto formales como no formales, si bien teniendo en cuanta información lingüística más avanzada como la sintáctica, en lugar de trabajar con simples bolsas de palabras. También queremos explorar el uso de redes de este tipo en el proceso de cación en no en la generación de características. Una posibilidad es utilizar una red de tipo DBN (Deep Belief Network) (Hinton y Salakhutdinov, 2006) en la que se añade una última fase donde se realiza el etiquetado de los ejemplos.</p><p>ELiRF-UPV en TASS 2016: Análisis de Sentimientos en Twitter Resumen: En este trabajo se describe la participación del equipo del grupo de investigación ELiRF de la Universitat Politècnica de València en el Taller TASS2016. Este taller es un evento enmarcado dentro de la XXXII edición del Congreso Anual de la Sociedad Española para el Procesamiento del Lenguaje Natural. Este trabajo presenta las aproximaciones utilizadas para las dos tareas planteadas en el taller, los resultados obtenidos y una discusión de los mismos. Nuestra participación se ha centrado principalmente en explorar diferentes aproximaciones para combinar un conjunto de sistemas con lo que se ha obtenido los mejores resultados en ambas tareas. Palabras clave: Twitter, Análisis de Sentimientos.</p><p>Abstract: This paper describes the participation of the ELiRF research group of the Universitat Politècnica de València at TASS2016 Workshop. This workshop is a satellite event of the XXXII edition of the Annual Conference of the Spanish Society for Natural Language Processing. This work describes the approaches used for the two tasks of the workshop, the results obtained and a discussion of these results. Our participation has focused primarily on exploring different approaches for combining a set of systems. Using these approaches we have achieved the best results in both tasks. Keywords: Twitter, Sentiment Analysis.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introducción</head><p>El Taller de Análisis de Sentimientos <ref type="bibr">(TASS)</ref> en sus cinco ediciones ha venido planteando tareas relacionadas con el análisis de sentimientos en Twitter. El objetivo principal es el de comparar y evaluar diferentes aproximaciones a estas tareas. Además, desarrolla recursos de libre acceso, básicamente, corpora anotados con polaridad, temática, tendencia política, aspectos, que son de gran utilidad para la comparación de diferentes aproximaciones a las tareas propuestas.</p><p>En esta quinta edición del TASS se proponen dos tareas de ediciones anteriores <ref type="bibr">(García-Cumbreras et al., 2016)</ref>: 1) Determinación de la polaridad en tweets, con diferentes grados de intensidad en la polaridad: 6 etiquetas y 4 etiquetas y 2) Determinación de la polaridad de los aspectos en el corpus STOMPOL. Este corpus consta de un con-junto de tweets sobre diferentes aspectos pertenecientes al dominio de la política.</p><p>El presente artículo resume la participación del equipo ELiRF-UPV de la Universitat Politècnica de València en todas las tareas planteadas en este taller. Primero se describen las aproximaciones y recursos utilizados en cada tarea. A continuación se presenta la evaluación experimental realizada y los resultados obtenidos. Finalmente se muestran las conclusiones y posibles trabajos futuros.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Descripción de los sistemas</head><p>Los sistemas presentados en el TASS 2016 se basan en el sistema desarrollado en la edicion anterior del TASS 2015 <ref type="bibr">(Hurtado, Pla, y Buscaldi, 2015)</ref>. Muchas de las características y recursos de este sistema fueron utilizados en las ediciones en las que nuestro equipo ha participado <ref type="bibr" target="#b42">(Pla y Hurtado, 2013</ref>) <ref type="bibr">(Hurtado y Pla, 2014)</ref> . El preproceso de los tweets utiliza la estrategia descrita en el trabajo TASS 2013 Hurtado, Esta consiste básicamente en la adaptación para el castellano del tokenizador de tweets Tweetmotif <ref type="bibr">(Connor, Krieger, y Ahn, 2010)</ref>. También se ha usado Freeling (Padró y Stanilovsky, 2012)<ref type="foot" target="#foot_7">1</ref> como lematizador, detector de entidades nombradas y etiquetador morfosintáctico, con las correspondientes modificaciones para el dominio de Twitter. Usando esta aproximación, la tokenización ha consistido en agrupar todas las fechas, los signos de puntuación, los números y las direcciones web. Se han conservado los hashtags y las menciones de usuario. Se ha considerado y evaluado el uso de palabras y lemas como tokens así como la detección de entidades nombradas.</p><p>Todas las tareas se han abordado como un problema de clasificación. Se han utilizado Máquinas de Soporte Vectorial (SVM) por su capacidad para manejar con éxito grandes cantidades de características. En concreto usamos dos librerías (LibSVM<ref type="foot" target="#foot_8">2</ref> y LibLinear<ref type="foot" target="#foot_9">3</ref> ) que han demostrado ser eficientes implementaciones de SVM que igualan el estado del arte. El software está desarrollado en Python y para acceder a las librerías de SVM se ha utilizado el toolkit scikit-learn<ref type="foot" target="#foot_10">4</ref> . <ref type="bibr">(Pedregosa et al., 2011)</ref>.</p><p>En este trabajo se ha explotado la técnica de combinación de diferentes configuraciones de clasificadores para aprovechar su complementariedad. Se ha utilizado la técnica de votación simple utilizada en trabajos anteriores <ref type="bibr" target="#b42">(Pla y Hurtado, 2013)</ref>  <ref type="bibr" target="#b44">(Pla y Hurtado, 2014b)</ref> pero en este caso extendiéndola a un número mayor de clasificadores, con diferentes parámetros y características (palabras, lemas, n-gramas de palabras y lemas) así como estrategias de combinación alternativas.</p><p>Cada tweet se ha representado como un vector que contiene los coeficientes tf-idf de las características consideradas. En toda la experimentación realizada, las características y los parámetros de los clasificadores se han elegido mediante una validación cruzada de 10 iteraciones (10-fold cross-validation) sobre el conjunto de entrenamiento.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Tarea 1: Análisis de sentimientos en tweets</head><p>Esta tarea consiste en determinar la polaridad de los tweets y la organización ha definido dos subtareas. La primera distingue seis etiquetas de polaridad: N y N+ que expresan polaridad negativa con diferente intensidad, P y P+ para la polaridad positiva con diferente intensidad, NEU para la polaridad neutra y NONE para expresar ausencia de polaridad. La segunda sólo distinguen 4 etiquetas de polaridad: N, P, NEU y NONE.</p><p>El corpus proporcionado por la organización del TASS consta de un conjunto de entrenamiento, compuesto por 7219 tweets etiquetados con la polaridad usando seis etiquetas, y un conjunto de test, de 60798 tweets, al cual se le debe asignar la polaridad. La distribución de tweets según su polaridad en el conjunto de entrenamiento se muestra en la Tabla 1. Tabla 1: Distribución de tweets en el conjunto de entrenamiento según su polaridad.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Polaridad</head><p>A partir de la tokenización propuesta se realizó un proceso de validación cruzada (10fold cross validation) para determinar el mejor conjunto de características y los parámetros del modelo. Como características se probaron diferentes tamaños de n-gramas de palabras y de lemas. También se exploró la combinación de los modelos mediante diferentes técnicas de votación para aprovechar su complementariedad y mejorar las prestaciones finales. Algunas de éstas técnicas proporcionaron mejoras significativas sobre el mismo conjunto de datos, como se muestra en <ref type="bibr" target="#b44">(Pla y Hurtado, 2014b)</ref>. En todos los casos se han utilizado diccionarios de polaridad, tanto de lemas (Saralegi y San Vicente, 2013), como de palabras <ref type="bibr" target="#b40">(Martínez-Cámara et al., 2013)</ref> y el diccionario Afinn <ref type="bibr">(Hansen et al., 2011)</ref> traducido automáticamente del inglés al castellano.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Se han considerado dos alternativas para abordar la</head><p>La primera combina mediante un sistema de votación ponderada la de 192 clasificadores basados en el uso de SVM. La diferencia entre los clasificadores radica en el preprocesado y la tokenización utilizada, las características seleccionadas y los valores de los parámetros del propio modelo SVM. En concreto se realizaron todas las combinaciones posibles entre 8 tokenizaciones (lemas o palabras, detectar NE o no, detectar menciones a usuarios y hashtags, ...); 4 conjuntos distinto de características (palabras o bigramas con y sin diccionarios de polaridad) y 6 valores distintos del parámetro c del modelo SVM con kernel lineal. La clase asignada a cada tweet t viene determinada por la siguiente fórmula.</p><formula xml:id="formula_4">ĉ = argmax c∈C (N t (c) • P (c))<label>(1)</label></formula><p>Donde C es el conjunto de todas las clases, N t (c) es el número de clasificadores que asignan la clase c al tweet t, y P (c) es la probabilidad a priori de la clase c calculada utilizando el corpus de entrenamiento.</p><p>run2 La segunda alternativa explora la combinación de modelos mediante el aprendizaje de un metaclasificador. Utilizando las salidas de los mismos 192 clasificadores que en el run anterior, se ha aprendido un segundo modelo SVM que sirve para proporcionar la nueva salida combinada. Se ha destinado una parte del corpus de entrenamiento para ajustar los parámetros del metamodelo. Esta aproximación es la misma que la utilizada en la edición del TASS 2015.</p><p>Para la subtarea de 4 etiquetas el run1 se ha aprendido utilizando el corpus de aprendizaje con 4 etiquetas mientras que el run2, dada la complejidad del ajuste de parámetros del metamodelo se ha optado por adaptar el resultado de la subtarea de 6 etiquetas uniendo P y P+ como P y N y N+ como N.</p><p>En la Tabla 2 se muestran los valores de Accuracy obtenidos para las dos subtareas. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Tarea 2: Análisis de Polaridad de Aspectos en Twitter</head><p>Esta tarea consiste en asignar la polaridad a los aspectos que aparecen marcados en el corpus. Una de las dificultades de la tarea consiste en definir qué contexto se le asigna a cada aspecto para poder establecer su polaridad. Para un problema similar, detección de la polaridad a nivel de entidad, en la edición del TASS 2013, propusimos una segmentación de los tweets basada en un conjunto de heurísticas <ref type="bibr" target="#b42">(Pla y Hurtado, 2013)</ref>. Esta aproximación también se utilizó para la tarea de detección de la tendencia política de los usuarios de Twitter (Pla y Hurtado, 2014a) y para este caso proporcionó buenos resultados. En este trabajo se propone una aproximación más simple que consiste en determinar el contexto de cada aspecto a través de una ventana fija definida a la izquierda y derecha de la instancia del aspecto. Esta aproximación es la que se utilizó en nuestro sistema del TASS 2015 la cual utiliza ventanas de diferente longitud. La longitud de la ventana óptima se ha determinado experimentalmente sobre el conjunto de entrenamiento mediante una validación cruzada. Para entrenar nuestro sistema, se ha considerado el conjunto de entrenamiento únicamente, se han determinado los segmentos para cada aspecto y se ha seguido una aproximación similar a la Tarea 1.</p><p>El corpus de la tarea, corpus STOMPOL, se compone de un conjunto de tweets relacionados con una serie de aspectos políticos (como economía, sanidad, etc.) enmarcados en la campaña política de las elecciones andaluzas de 2015. Cada aspecto se relaciona con una o varias entidades que se corresponden con uno de los principales partidos políticos en España PSOE, IU, y Podemos). El corpus consta de 1.284 tweets, ha sido dividido en un conjunto de entrenamiento (784 tweets) y un conjunto de evaluación (500 tweets).</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Aproximación y resultados</head><p>A continuación presentamos una pequeña descripción de las características de nuestro sistema así como el proceso seguido en la fase de entrenamiento. El sistema utiliza un clasificador basado en SVM. Para aprender los modelos sólo se utiliza el conjunto de entrenamiento proporcionado para la tarea y los diccionarios de polaridad previamente descritos. Antes de abordar el entrenamiento se determinan los segmentos de tweet que constituyen el contexto de cada una de los aspectos presentes. Se ha tenido en cuenta tres tamaños de ventana de longitudes 5, 7 y 10 palabras a la izquierda y derecha del aspecto. Cada uno de los segmentos se tokeniza y se utiliza Freeling para determinar sus lemas y ciertas entidades. A continuación se aprenden diferentes modelos combinando tamaños de ventana, parámetros del modelo y diferentes características (palabras, lemas, NE, etc). Mediante validación cruzada se elige el mejor modelo. Para esta tarea sólo hemos presentado un modelo.</p><p>Run Accuracy STOMPOL run1 0.633 Tabla 3: Resultados oficiales del equipo ELiRF-UPV en la Tarea 2 de la competición TASS-2016 para el corpus STOMPOL.</p><p>En la Tabla 3 se presentan los resultados obtenidos para la Tarea 2 con lo que nuestra aproximación ha obtenido la primera posición en dicha tarea.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusiones y trabajos futuros</head><p>En este trabajo se ha presentado la participación del grupo ELiRF-UPV en las 2 tareas planteadas en TASS 2016. Nuestro equipo ha utilizado aproximaciones basadas en máquinas de soporte vectorial y se ha centrado principalmente en combinar diferentes sistemas.</p><p>Haciendo un análisis del número de participantes y de los resultados obtenidos en las dos últimas ediciones del TASS, creemos que se está cerca de alcanzar los mejores resultados posibles en la tarea de Análisis de sentimientos tal y como se ha venido planteando hasta el momento.</p><p>A la vista de los buenos resultados que se han obtenido mediante la combinación de sistemas, como trabajo futuro nos planteamos desarrollar nuevos métodos de combinación de sistemas más sofisticados así como la inclusión de otros paradigmas de clasificación más hetereogéneos (distintos de los SVM) para aumentar la complementariedad de los sistemas combinados.</p><p>Además, se pretende extender el sistema para otros idiomas. El sistema descrito ya ha sido utilizado, con ligeras modificaciones, en tareas de análisis de sentimientos para el Inglés en la competición Semeval <ref type="bibr" target="#b39">(Martínez, Pla, y Hurtado, 2016)</ref> aunque con resultados no tan satisfactorios como en las tareas del TASS.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Agradecimientos</head><p>Este trabajo ha sido parcialmente subvencionado por el MINECO mediante el proyecto ASLP-MULAN: Audio, Speech and Language Processing for Multimedia Analytics (TIN2014-54288-C4-3-R). </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Bibliografía</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">Introduction</head><p>The social media activity is being profused in the recent years, users post opinions and comments in Twitter and in other social platforms. Due to this, there is a huge amount of information available that could be useful for business, in order to design marketing campaigns or to apply any kind of business analysis.</p><p>As a consequence, the research on text mining and also on the field of Sentiment Analysis (sa) has grown considerably these days. sa is the part of Natural Language Processing (nlp) responsible for determining the polarity of a text or a whole sentence. The sa applied to Twitter has to be conducted in a restricted scenario due to the maxi-mum length of the post. However, tweets have other elements we have to consider, like hashtags, mentions and retweets. More concretely, aspect-based sentiment analysis (absa) consists of extracting opinions, i.e. determining the sentiment polarity, from specific entities in the text <ref type="bibr" target="#b58">(Liu, 2012)</ref>. Therefore, this task becomes a challenge on the field of nlp.</p><p>The tass Workshop (García-Cumbreras et al., 2016) and the sepln conference offer an opportunity for participants to know about the latest advances on the field of nlp for Spanish language.</p><p>Many approaches applied to sa can be found in the literature, where it is possible to distinguish between knowledge based approaches <ref type="bibr">(Brooke, Tofiloski, and Taboada, 2009;</ref><ref type="bibr" target="#b54">Fernández-Gavilanes et al., 2016)</ref>, using grammars and thesaurus and others based on machine learning approaches <ref type="bibr">(Mo-hammad, Kiritchenko, and Zhu, 2013)</ref>. In the last years we can also find deep learning approaches <ref type="bibr" target="#b48">(Bengio, 2009)</ref>, applied to this task.</p><p>We our supervised machine (ml) system which consists of a Support Vector Machine (svm) classifier. Our objective is to conduct the sa process at an aspect level, task 2, determining the polarity of a specific given part of a sentence.</p><p>The article is structured as follows. Section 2 is a review of the research involving sa in the Twitter domain. Then, the Section 3 describes the applied approach and the implemented system. In Section 4, we show the experimental results of our system. Finally, in Section 5 we present the conclusions and future works.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">Related work</head><p>A large amount of literature related to Opinion Mining (om) and sa can be found <ref type="bibr">(Pang and Lee, 2008;</ref><ref type="bibr" target="#b59">Martínez-Cámara et al., 2016)</ref>. Most of the systems are applied to Twitter. However others are applied to social media platforms within the micro-blog context. Due to this, the approaches are varied technically and in connection with the purpose.</p><p>Two main approaches exist in sa: supervised and unsupervised learning ones. Supervised systems implement classification methods like svm, Logistic Regression (lr), Conditional Random Fields (crf), K-Nearest Neighbors (knn), etc. <ref type="bibr" target="#b52">Cui, Mittal, and Datar (2006)</ref> affirmed that svm are more appropriate for sentiment classification than generative models, due to their capability for working with ambiguity, that is, dealing with mixed feelings. Supervised algorithms are used when the number of classes, as well as the representative members of each class, are known.</p><p>Unsupervised systems are based on linguistic knowledge like lexicons, and syntactic features in order to infer the polarity <ref type="bibr" target="#b62">(Paltoglou and Thelwall, 2012)</ref>. These last techniques represent a more effective approach in the cross-domain context and for multilingual applications. The unsupervised classification algorithms do not work with a training set, in contrast, some of them use clustering algorithms in order to distinguish groups <ref type="bibr" target="#b57">(Li and Liu, 2010)</ref>.</p><p>As noted earlier, the special case of ap-plying sa to Twitter has been fully addressed <ref type="bibr" target="#b61">(Pak and Paroubek, 2010;</ref><ref type="bibr" target="#b15">Han and Baldwin, 2011)</ref>. Within the chosen solutions, we highlight the text normalization approach <ref type="bibr" target="#b53">(Fabo, Cuadros, and Etchegoyhen, 2013)</ref> and the use of key elements in classification approach <ref type="bibr" target="#b66">(Wang et al., 2011)</ref>. Others hold the advantages of using deep learning techniques in this task <ref type="bibr">(dos Santos and Gatti, 2014)</ref>.</p><p>According to the purpose of the developed systems, it is possible to find applications like classification of product reviews and political sentiment and election results prediction <ref type="bibr" target="#b49">(Bermingham and Smeaton, 2011)</ref>, among others.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">System Overview</head><p>In this section we make a brief description of the system submitted for Task 2: Aspectbased sentiment analysis. We developed a supervised system, based on a svm classifier using different features. In the next subsections we explain the different steps required.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1">Preprocessing</head><p>Before applying any supervised approach to our corpus, some preprocessing is needed. First of all, we have to normalize the text, since in Twitter language we can find abbreviations, mentions, hashtags, URLs or misspellings. In order to do that, we replace the URLs with the "URL" tag and we replace the abbreviations or misspellings with the correct entire word. For mentions and hashtags, we keep them unchanged but deleting the "@" or "#" symbols. Moreover, when a hashtag is composed of several words, we split and treat them as different tokens.</p><p>After this, a lexical analysis is carried out. It consists of lemmatization and POS tagging, which are performed by means of Freeling tool <ref type="bibr" target="#b47">(Atserias et al., 2006)</ref>.</p><p>Once we have analysed lexically the texts, we decided to separate the sentences by the different aspects. For doing that, the scope of each aspect is determined, applying the following rules, which are adapted from our English aspect based sentiment anaylisis system <ref type="bibr">(Alvarez-López et al., 2016)</ref> • If there is only one aspect in the sentence, we keep the sentence unchanged, and introduce it entirely as input for the next step.</p><p>• If there are multiple aspects, we separate the sentences by punctuation marks, conjunctions or other aspects found.</p><p>• If there are several aspects with between them, we that they belong to the same context, and assign the same polarity to all of them.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2">SVM classifier</head><p>In this section we describe the strategy followed to determine the sentiment (positive, negative or neutral) for each aspect predefined in corpus.</p><p>We develop a svm classifier, using the libsvm library <ref type="bibr" target="#b51">(Chang and Lin, 2011)</ref>. The inputs for the svm will be the sentences separated by contexts, as explained in the previous subsection. The features extracted are the following:</p><p>• Word tokens of nouns, adjectives and verbs in the sentence.</p><p>• Lemmas of verbs, nouns and adjectives that appear in each sentence.</p><p>• POS tags of nouns, adjectives and verbs.</p><p>• N-grams of different length, grouping the words in each sentence.</p><p>• Aspects appearing in the sentence. We join "aspect"-"entity", defined in each target as a feature.</p><p>• Negations. We create a negation dictionary, which contains several particles indicating negation, such as "no", "nunca", etc.</p><p>The previous features are all binary ones, assigning the value 1 if the current feature is present in the tweet and the value 0, if not.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">Experimental Results</head><p>The Task 2: Sentiment Analysis at the aspect level consists of assigning a polarity label to each aspect, which were initially marked in the stompol corpus <ref type="bibr">(Martínez-Cámara et al., 2016)</ref> raised by the tass organization. In this way, this corpus provides both polarity labels and the identification of the aspects that appear in each tweet. The aim is to be able to correctly assign to each aspect a positive, negative or neutral polarity.</p><p>In this regard, the stompol corpus consists of a set of Spanish tweets related to a number of political issues, such as health or economy, among others. These issues are framed in the political campaign of Andalusian elections in 2015, where each aspect relates to one or several entities that correspond to one of the main political parties in Spain (PP, PSOE, IU, UPyD, Cs and Podemos). The corpus is composed by 1,284 tweets, and has been divided into a training set (784 tweets) and a set of evaluation (500 tweets).</p><p>In order to evaluate the performance of the various features for polarity classification at an aspect-based level, we perform a series of ablation experiments as shown in Table 1. We start with the word token baseline classifier, and then add all four sets of features that help to increase performance as measured by accuracy. As we might expect, including the aspect feature has the most marked effect on the performance of polarity classification, although all the features contributed to improving overall performance on stompol corpus. Due to the low participation of research teams in task 2 this year, we decided to compare our proposal to the systems presented this year and also to that ones of last year, because of the use of the same dataset.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Type</head><p>For this reason, Table <ref type="table" target="#tab_1">2</ref> compares results for our approach with different official ones submitted in 2015 and 2016 tass editions. In this way, we compared our results for a ml approach based on well-known squaredregularised logistic regression with a snippet of length 4 (Lys-2) described in <ref type="bibr" target="#b65">Vilares et al. (2015)</ref>, a clustering method focused on grouping authors with similar sociolinguistic insights (TID-spark) described in <ref type="bibr" target="#b64">Park (2015)</ref>, a recurrent neural network composed of a single long short term memory and a logistic function (Lys-1) described in <ref type="bibr" target="#b65">Vilares et al. (2015)</ref>, a ml approach based on a svm with a snipped of length 5,7 and 10 (ELiRF) described in <ref type="bibr">Hurtado, Plà, and Buscaldi (2015)</ref>, and the best performing run of the actual task edition (ELiRF-UPV). Comparing the results, the performance of our current model is close from the top ranking systems of this and last year.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Task edition Accuracy</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>ELiRF</head></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5">Conclusions and future works</head><p>This paper describes the participation of the GTI group in the tass 2016, Task 2: Aspect-Based Sentiment Analysis. We developed a supervised system based on a svm classifier for the aspect-based sentiment analysis. The performance of our approach has been compared to that ones submitted this year but also to that ones submitted last year. Experimental results suggest that we need to include explore new features, such as word embedding representations or paraphrase <ref type="bibr" target="#b67">(Zhao and Lan, 2015)</ref>, in order to improve the performance.</p><p>As future work we plan to include new features explained before and to develop a new system which combines different ml classification methods. We are also interested in considering different paradigms of heterogeneous classification, such as deep learning to increase the performance.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: Sample tweets (General corpus)</figDesc><graphic coords="15,71.38,382.90,212.40,268.08" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 2</head><label>2</label><figDesc>Figure2shows the information of two sample tweets.</figDesc><graphic coords="15,312.58,598.66,212.40,81.36" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Sample tweets (STOMPOL corpus)</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Figure 3 :</head><label>3</label><figDesc>Figure 3: Tweet not rightly classified by any system</figDesc><graphic coords="19,93.70,416.98,168.24,128.88" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_4"><head></head><label></label><figDesc>Figures Figure3,Figure 4Figure 5 are three examples of tweets that were not rightly classified by any system. The common feature of the three tweets is that they do not have any lexical marker that express emotion or opinion. Moreover, the tweet of the Figure 4 is sarcastic, which means an additional challenging for SA because requires a deep understanding of the language.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_5"><head>Figure 4 :</head><label>4</label><figDesc>Figure 4: Tweet not rightly classified by any system</figDesc><graphic coords="19,331.78,294.58,174.00,165.12" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head></head><label></label><figDesc>Positive :-), :), :D, :o), :], D:3, :c), :&gt;, =], 8), =), :}, :ˆ), :-D, 8-D, 8D, x-D, xD, X-D, XD, =-D, =D, =-3, =3, BˆD, :'), :'), :*, :-*, :ˆ*, ;-), ;), *-), *), ;-], ;], ;D, ;ˆ), &gt;:P, :-P, :P, X-P, x-p, xp, XP, :-p, :p, =p, :-b, :b Negative &gt;:[, :-(, :(, :-c, :-&lt;, :&lt;, :-[, :[, :{, ;(, :-||, &gt;:(, :'-(, :'(, D:&lt;, D=, v.v</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_7"><head>Figura 1 :</head><label>1</label><figDesc>Figura 1: Número de tweets recuperados cada 12 horas</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0"><head></head><label></label><figDesc></figDesc><graphic coords="26,72.00,83.96,453.55,127.84" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 :</head><label>1</label><figDesc>Corpus statistics</figDesc><table><row><cell>Attribute</cell><cell>Value</cell></row><row><cell>Tweets</cell><cell>68.017</cell></row><row><cell>Tweets (test)</cell><cell>60.798 (89%)</cell></row><row><cell>Tweets (test)</cell><cell>7.219 (11%)</cell></row><row><cell>Topics</cell><cell>10</cell></row><row><cell>Users</cell><cell>154</cell></row><row><cell>Date start (train)</cell><cell>2011-12-02</cell></row><row><cell>Date end (train)</cell><cell>2012-04-10</cell></row><row><cell>Date start (test)</cell><cell>2011-12-02</cell></row><row><cell>Date end (test)</cell><cell>2012-04-10</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2 .</head><label>2</label><figDesc>Overview of<ref type="bibr" target="#b46">TASS 2016</ref> </figDesc><table><row><cell>Entity</cell><cell>Train</cell><cell>Test</cell></row><row><cell>PP</cell><cell>205</cell><cell>125</cell></row><row><cell>PSOE</cell><cell>136</cell><cell>70</cell></row><row><cell>C's</cell><cell>119</cell><cell>87</cell></row><row><cell>Podemos</cell><cell>98</cell><cell>80</cell></row><row><cell>IU</cell><cell>111</cell><cell>43</cell></row><row><cell>UPyD</cell><cell>97</cell><cell>124</cell></row><row><cell>Total</cell><cell>766</cell><cell>529</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 2 :</head><label>2</label><figDesc>Number of tweets per entity and per corpus subset</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_3"><head>Table 3 :</head><label>3</label><figDesc>Participant groups</figDesc><table><row><cell>Group</cell><cell>Report</cell></row><row><cell></cell><cell>ELiRF-UPV en TASS 2016:</cell></row><row><cell>ELiRF</cell><cell>Análisis de Sentimientos en</cell></row><row><cell></cell><cell>Twitter</cell></row><row><cell></cell><cell>GTI at TASS 2016:</cell></row><row><cell>GTI</cell><cell>Supervised Approach for Aspect Based Sentiment</cell></row><row><cell></cell><cell>Analysis in Twitter</cell></row><row><cell></cell><cell>JACERONG at TASS 2016:</cell></row><row><cell>jacerong</cell><cell>An Ensemble Classifier for Sentiment Analysis of Spanish</cell></row><row><cell></cell><cell>Tweets at Global Level</cell></row><row><cell></cell><cell>LABDA at the 2016 TASS</cell></row><row><cell>LABDA</cell><cell>challenge task: using word embedding for the sentiment</cell></row><row><cell></cell><cell>analysis task</cell></row><row><cell>SINAI</cell><cell>Participación de SINAI en TASS 2016</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_4"><head>Table 4 :</head><label>4</label><figDesc>Participant reports</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_6"><head>Table 5 :</head><label>5</label><figDesc>Results for Task 1, 5 levels</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_8"><head>Table 6 :</head><label>6</label><figDesc>Results for Task 1, 3 levels</figDesc><table><row><cell>Overview of TASS 2016</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_10"><head>Table 7 :</head><label>7</label><figDesc>Results for Task 2</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_11"><head>Number of systems Rate of tweets</head><label></label><figDesc></figDesc><table><row><cell>0</cell><cell>0.056%</cell></row><row><cell>1</cell><cell>0.065%</cell></row><row><cell>2</cell><cell>0.063%</cell></row><row><cell>3</cell><cell>0.067%</cell></row><row><cell>4</cell><cell>0.059%</cell></row><row><cell>5</cell><cell>0.061%</cell></row><row><cell>6</cell><cell>0.074%</cell></row><row><cell>7</cell><cell>0.078%</cell></row><row><cell>8</cell><cell>0.081%</cell></row><row><cell>9</cell><cell>0.112%</cell></row><row><cell>10</cell><cell>0.122%</cell></row><row><cell>11</cell><cell>0.082%</cell></row><row><cell>12</cell><cell>0.062%</cell></row><row><cell>13</cell><cell>0.011%</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_12"><head>Table 8 :</head><label>8</label><figDesc>Rate of tweets rightly classified (6 classes) by a number of systems</figDesc><table /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_13"><head>Beating Barça by 17 points in the Copa is amazing Polarity: P+ Id: 177439342497767424</head><label></label><figDesc></figDesc><table><row><cell>hahahahahaha "@Absolutexe: ¿Le</cell></row><row><cell>han cambiado ya el nombre a la</cell></row><row><cell>Junta de Andalucía por la Banda de</cell></row><row><cell>Andalucía o aún no?"</cell></row><row><cell>hahahahahaha "@Absolutexe: Has the</cell></row><row><cell>Junta de Andalucía renamed Gang of</cell></row><row><cell>Andalucía or not yet?"</cell></row><row><cell>Polarity: N+</cell></row><row><cell>Id: 177439342497767424</cell></row><row><cell>Rubalcaba pide a Rajoy que</cell></row><row><cell>presente ya los Presupuestos y dice</cell></row><row><cell>que no lo hace porque espera a las</cell></row><row><cell>elecciones andaluzas</cell></row><row><cell>Rubalcaba requires Rajoy to submit the</cell></row><row><cell>Budget and says that he didn't because</cell></row><row><cell>he is waiting the results of the elections</cell></row><row><cell>in Andalucia</cell></row><row><cell>Polarity: NONE</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_14"><head></head><label></label><figDesc>Evaluación de Modelos de Representación del Texto con Vectores de Dimensión Reducida para Análisis de Sentimiento *</figDesc><table><row><cell>Turney, P. D. 2002. Thumbs up or thumbs</cell><cell></cell><cell></cell><cell></cell></row><row><cell>down?: Semantic orientation applied to</cell><cell></cell><cell></cell><cell></cell></row><row><cell>unsupervised classification of reviews. In</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Proceedings of the 40th Annual Meeting on</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Association for Computational Linguistics,</cell><cell></cell><cell></cell><cell></cell></row><row><cell cols="5">ACL '02, pp: 417-424. Association for Evaluation of Reduced Dimension Vector Text Representation Models for</cell></row><row><cell cols="2">Computational Linguistics, Stroudsburg, Sentiment Analysis</cell><cell></cell><cell></cell></row><row><cell>PA, USA. doi:10.3115/1073083.1073153.</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Villena-Román, J., Sara, L. S., Eugenio M. C., Edgar Casasola Murillo</cell><cell cols="3">Gabriela Marín Raventós</cell></row><row><cell>and José Carlos G. C. 2013. TASS -Universidad de Costa Rica</cell><cell cols="3">Universidad de Costa Rica</cell></row><row><cell>Workshop on Sentiment Analysis at SEPLN. San José, Costa Rica</cell><cell cols="2">San José, Costa Rica</cell><cell></cell></row><row><cell>Revista de Procesamiento del Lenguaje edgar.casasola@ucr.ac.cr</cell><cell cols="3">gabriela.marin@ucr.ac.cr</cell></row><row><cell>Natural, 50, pp 37-44.</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Villena-Román, J., Janine G. M., Sara L. S. and</cell><cell></cell><cell></cell><cell></cell></row><row><cell>José Carlos G. C. 2014. TASS 2013 -A</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Second Step in Reputation Analysis in</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Spanish. Revista de Procesamiento del</cell><cell></cell><cell></cell><cell></cell></row><row><cell>Lenguaje Natural, 52, pp 37-44.</cell><cell></cell><cell></cell><cell></cell></row><row><cell></cell><cell cols="4">Pang, B., Lillian Lee and Shivakumar</cell></row><row><cell></cell><cell>Vaithyanathan.</cell><cell>2002.</cell><cell>Thumbs</cell><cell>up?:</cell></row><row><cell></cell><cell cols="4">Sentiment classification using machine</cell></row><row><cell></cell><cell cols="4">learning techniques. In Proceedings of the</cell></row><row><cell></cell><cell cols="4">ACL-02 Conference on Empirical Methods</cell></row><row><cell></cell><cell cols="4">in Natural Language Processing -Volume</cell></row><row><cell></cell><cell cols="4">10, EMNLP '02, páginas 79-86. Association</cell></row><row><cell></cell><cell cols="4">for Computational Linguistics, Stroudsburg,</cell></row><row><cell></cell><cell cols="4">PA, USA. doi:10.3115/1118693.1118704.</cell></row><row><cell></cell><cell cols="4">Pang, B. and Lillian Lee (2008). Opinion</cell></row><row><cell></cell><cell cols="4">mining and sentiment analysis. Foundations</cell></row><row><cell></cell><cell cols="4">and Trends in Information Retrieval, 2(1-</cell></row><row><cell></cell><cell>2):1-135.</cell><cell>ISSN</cell><cell cols="2">1554-0669.</cell></row><row><cell></cell><cell cols="2">doi:10.1561/1500000011.</cell><cell></cell></row><row><cell></cell><cell cols="4">Quirós, A., Isabel S. B. and Paloma M. 2016.</cell></row><row><cell></cell><cell cols="4">LABDA at the 2016 TASS challenge task:</cell></row><row><cell></cell><cell cols="4">using word embeddings for the sentiment</cell></row><row><cell></cell><cell cols="4">analysis task. In Proceedings of TASS 2016:</cell></row><row><cell></cell><cell cols="4">Workshop on Sentiment Analysis at SEPLN</cell></row><row><cell></cell><cell cols="4">co-located with the 32nd SEPLN</cell></row><row><cell></cell><cell cols="4">Conference (SEPLN 2016), Salamanca,</cell></row><row><cell></cell><cell>September</cell><cell></cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_17"><head>Table 3 :</head><label>3</label><figDesc>Results for Sentiment Analysis at global level (3 levels, Full test corpus)</figDesc><table><row><cell></cell><cell>P</cell><cell>R</cell><cell>F1</cell><cell>Acc</cell></row><row><cell cols="5">RUN-1 0.411 0.449 0.429 0.527</cell></row><row><cell cols="5">RUN-2 0.412 0.448 0.429 0.527</cell></row><row><cell cols="5">RUN-3 0.402 0.436 0.418 0.549</cell></row><row><cell cols="5">Table 2: Results for Sentiment Analysis at</cell></row><row><cell cols="5">global level (5 levels, Full test corpus)</cell></row><row><cell>Run</cell><cell>P</cell><cell>R</cell><cell>F1</cell><cell>Acc</cell></row><row><cell cols="5">RUN-1 0.506 0.510 0.508 0.652</cell></row><row><cell cols="5">RUN-2 0.508 0.508 0.508 0.652</cell></row><row><cell cols="5">RUN-3 0.512 0.511 0.511 0.653</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_18"><head>Table 1 :</head><label>1</label><figDesc>Performance on the test set in the six-labels evaluation</figDesc><table><row><cell cols="2">Experiment Accuracy</cell><cell>Macro-Precision</cell><cell>Macro-Recall</cell><cell>Macro-F1</cell></row><row><cell>run-1</cell><cell>0.614</cell><cell>0.471</cell><cell>0.531</cell><cell>0.499</cell></row><row><cell>run-2</cell><cell>0.619</cell><cell>0.476</cell><cell>0.535</cell><cell>0.504</cell></row><row><cell>run-3</cell><cell>0.620</cell><cell>0.477</cell><cell>0.532</cell><cell>0.503</cell></row><row><cell cols="2">Experiment Accuracy</cell><cell>Macro-Precision</cell><cell>Macro-Recall</cell><cell>Macro-F1</cell></row><row><cell>run-1</cell><cell>0.702</cell><cell>0.564</cell><cell>0.565</cell><cell>0.564</cell></row><row><cell>run-2</cell><cell>0.704</cell><cell>0.567</cell><cell>0.568</cell><cell>0.567</cell></row><row><cell>run-3</cell><cell>0.705</cell><cell>0.568</cell><cell>0.567</cell><cell>0.568</cell></row><row><cell cols="5">Table 2: Performance on the test set in the</cell></row><row><cell cols="3">four-labels evaluation</cell><cell></cell><cell></cell></row><row><cell cols="5">Class Precision Recall F1-score</cell></row><row><cell>P</cell><cell>0.755</cell><cell>0.786</cell><cell cols="2">0.770</cell></row><row><cell>NEU</cell><cell>0.128</cell><cell>0.093</cell><cell cols="2">0.107</cell></row><row><cell>N</cell><cell>0.631</cell><cell>0.812</cell><cell cols="2">0.710</cell></row><row><cell cols="2">NONE 0.758</cell><cell>0.578</cell><cell cols="2">0.656</cell></row><row><cell cols="5">Table 3: Discriminative power for each class</cell></row><row><cell cols="3">in the four-labels evaluation</cell><cell></cell><cell></cell></row><row><cell cols="5">evaluation, and of 0.2% in the four-labels</cell></row><row><cell cols="5">evaluation; instead, a negligible gain occurs</cell></row><row><cell cols="5">among the "run-2" and" run-3" experiments,</cell></row><row><cell cols="5">taking additionally into account the compu-</cell></row><row><cell cols="4">tational cost of running the latter.</cell><cell></cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_24"><head>Table 1 :</head><label>1</label><figDesc>Results for polarity feature ablation experiments on stompol corpus</figDesc><table><row><cell></cell><cell cols="2">Accuracy Improvement</cell></row><row><cell>Word token</cell><cell>56.12</cell><cell></cell></row><row><cell>+Lemmas</cell><cell>57.64</cell><cell>+1.52%</cell></row><row><cell>+pos tags</cell><cell>58.26</cell><cell>+0.62%</cell></row><row><cell>+Aspects</cell><cell>59.94</cell><cell>+1.68%</cell></row><row><cell>+Negations</cell><cell>60.60</cell><cell>+0.66%</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_25"><head>Table 2 :</head><label>2</label><figDesc>Results of different approaches in 2015/2016 tass editions on stompol corpus</figDesc><table><row><cell>-UPV</cell><cell>2016</cell><cell>63.3</cell></row><row><cell>ELiRF</cell><cell>2015</cell><cell>63.3</cell></row><row><cell>GTI</cell><cell>2016</cell><cell>60.6</cell></row><row><cell>LyS-1</cell><cell>2015</cell><cell>59.9</cell></row><row><cell>TID-spark</cell><cell>2015</cell><cell>55.7</cell></row><row><cell>Lys-2</cell><cell>2015</cell><cell>54.0</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" xml:id="foot_0">Publicado en http://ceur-ws.org/. CEUR-WS.org es una publicación en serie con ISSN reconocido ISSN 1613-0073</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_1">http://snowball.tartarus.org/algorithms/spanish/stop.txt</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_2">https://www.meaningcloud.com/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_3">Experiments showed that not-lemmatized text performed better in all settings, hence the best settings reported here is using not-lematized model</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_4">http://dumps.wikimedia.org/eswiki</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_5">http://scikit-learn.org/ Participación de SINAI en<ref type="bibr" target="#b46">TASS 2016</ref> </note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" xml:id="foot_6">Participación de SINAI en<ref type="bibr" target="#b46">TASS 2016</ref> </note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_7">http://nlp.lsi.upc.edu/freeling/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_8">http://www.csie.ntu.edu.tw/˜cjlin/libsvm/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_9">http://www.csie.ntu.edu.tw/˜cjlin/liblinear/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_10">http://scikit-learn.org/stable/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" xml:id="foot_11">Álvarez-López, M. Fernández-Gavilanes, S. García-Méndez, J. Juncal-Martínez, F. J. González-Castaño</note>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgements</head><p>This work has been partially supported by a grant from the Fondo Europeo of Desarrollo Regional (FEDER) and REDES project (TIN2015-65136-C2-1-R) from the Spanish Government.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgments</head><p>This work was supported by eGovernAbility-Access project (TIN2014-52665-C2-2-R).</p></div>
			</div>


			<div type="funding">
<div xmlns="http://www.tei-c.org/ns/1.0"><p>LABDA at the 2016 TASS challenge task: using word embeddings for the sentiment analysis task Antonio Quirós, Isabel Segura-Bedmar, Paloma Martínez .LABDA at the 2016 TASS challenge task: using word embeddings for the sentiment analysis task Antonio Quirós, Isabel Segura-Bedmar, Paloma Martínez .* This work was supported by eGovernAbility-Access project (TIN2014-52665-C2-2-R). * This work was partially supported by the Ministerio de Economía y Competitividad under project COINS (TEC2013-47016-C2-1-R) and by Xunta de Galicia (GRC2014/046).</p></div>
			</div>

			<div type="annex">
<div xmlns="http://www.tei-c.org/ns/1.0"><head>GTI at TASS 2016: Supervised Approach for Aspect Based</head><p>Sentiment Analysis in Twitter * GTI en TASS 2016: Una aproximación supervisada para el de sentimiento aspectos en Twitter Tamara Álvarez-López, Milagros Fernández-Gavilanes, Silvia García-Méndez, Jonathan Juncal-Martínez, Francisco Javier González-Castaño GTI Research Group, AtlantTIC University of Vigo, 36310 Vigo, Spain {talvarez,mfgavilanes,sgarcia,jonijm}@gti.uvigo.es, javier@det.uvigo.es</p><p>Resumen: Este artículo describe la participación del grupo de investigación GTI, del centro AtlantTIC, perteneciente a la Universidad de Vigo, en el tass 2016. Este taller es un evento enmarcado dentro de la XXXII edición del Congreso Anual de la Sociedad Española para el Procesamiento del Lenguaje Natural. En este trabajo se propone una aproximación supervisada, basada en clasificadores, para la tarea de análisis de sentimiento basado en aspectos. Mediante esta técnica hemos conseguido mejorar las prestaciones de ediciones anteriores, obteniendo una solución acorde con el estado del arte actual. Palabras clave: Análisis de sentimiento, aspectos, SVM, aprendizaje automático, Twitter Abstract: This paper describes the participation of the GTI research group of AtlantTIC, University of Vigo, in tass 2016. This workshop is framed within the XXXII edition of the Annual Congress of the Spanish Society for Natural Language Processing event. In this work we propose a supervised approach based on classifiers, for the aspect based sentiment analysis task. Using this technique we managed to improve the performance of previous years, obtaining a solution reflecting the actual state-of-the-art. Keywords: Sentiment analysis, aspects, SVM, machine learning, Twitter</p></div>			</div>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">JACERONG at TASS 2016: An Ensemble Classifier for Sentiment Analysis of Spanish Tweets at Global Level</title>
		<author>
			<persName><forename type="first">E</forename><surname>Cambria</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Amir Hussain</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">A</forename><surname>Cerón ; Guzmán</surname></persName>
		</author>
		<idno type="DOI">10.1007/978-94-007-5070-8</idno>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)</title>
				<meeting>TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)<address><addrLine>Salamanca</addrLine></address></meeting>
		<imprint>
			<publisher>Springer Netherlands</publisher>
			<date type="published" when="2012">2012. 2016. September</date>
			<biblScope unit="volume">2</biblScope>
		</imprint>
	</monogr>
	<note>Sentic Computing. Techniques, Tools and Applications</note>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Evaluación de Modelos de Representación del Texto con Vectores de Dimensión Reducida para Análisis de Sentimiento</title>
		<author>
			<persName><forename type="first">E</forename><surname>Casasola Murillo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">R</forename><surname>Gabriela</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)</title>
				<meeting>TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)<address><addrLine>Salamanca</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2016-09">2016. September</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">ELiRF-UPV en TASS 2016: Análisis de Sentimientos en Twitter</title>
		<author>
			<persName><forename type="first">Ll</forename><surname>Hurtado</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Ferran</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)</title>
				<meeting>TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)<address><addrLine>Salamanca</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2016-09">2016. September</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">Participación de SINAI en TASS 2016</title>
		<author>
			<persName><forename type="first">A</forename><surname>Montejo-Ráez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">C</forename><surname>Díaz-Galiano</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)</title>
				<editor>
			<persName><forename type="first">A</forename><surname>Corazza</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Lavelli</surname></persName>
		</editor>
		<editor>
			<persName><surname>Zanoli</surname></persName>
		</editor>
		<meeting>TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)<address><addrLine>Salamanca</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2016">2016. 2016. 2016</date>
			<biblScope unit="page">71</biblScope>
		</imprint>
	</monogr>
	<note>A knowledge-poor approach to chemical-disease relation extraction</note>
</biblStruct>

<biblStruct xml:id="b4">
	<monogr>
		<title level="m" type="main">Spanish Billion Words Corpus and Embeddings</title>
		<author>
			<persName><forename type="first">C</forename><surname>Cardellino</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2016-03">2016. March</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Modelling public sentiment in twitter: using linguistic patterns to enhance supervised learning</title>
		<author>
			<persName><forename type="first">P</forename><surname>Chikersal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Poria</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Cambria</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gelbukh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">E</forename><surname>Siong</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Conference on Intelligent Text Processing and Computational Linguistics</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2015">2015</date>
			<biblScope unit="page" from="49" to="65" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Overview of tass 2016</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>García-Cumbreras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Villena-Román</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Martínez-Cámara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">C</forename><surname>Díaz-Galiano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">T</forename><surname>Martín-Valdivia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">A U</forename><surname>López</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN colocated with the 32nd SEPLN Conference (SEPLN 2016)</title>
				<meeting>TASS 2016: Workshop on Sentiment Analysis at SEPLN colocated with the 32nd SEPLN Conference (SEPLN 2016)<address><addrLine>Salamanca, Spain</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2016-09">2016. September</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Distributed representations of words and phrases and their compositionality</title>
		<author>
			<persName><forename type="first">T</forename><surname>Mikolov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Dean</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Advances in neural information processing systems</title>
				<imprint>
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Semantic orientation for polarity classification in spanish reviews</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">D</forename><surname>Molina-González</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Martínez-Cámara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>-T. Martín-Valdivia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">M</forename><surname>Perea-Ortega</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Expert Systems with Applications</title>
		<imprint>
			<biblScope unit="volume">40</biblScope>
			<biblScope unit="issue">18</biblScope>
			<biblScope unit="page" from="7250" to="7257" />
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Opinion mining and sentiment analysis</title>
		<author>
			<persName><forename type="first">B</forename><surname>Pang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Lee</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Foundations and trends in information retrieval</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="issue">1-2</biblScope>
			<biblScope unit="page" from="1" to="135" />
			<date type="published" when="2008">2008</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Exploring word embedding for drug name recognition</title>
		<author>
			<persName><forename type="first">I</forename><surname>Segura-Bedmar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Suárez-Paniagua</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Martınez</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">SIXTH INTERNATIONAL WORKS-HOP ON HEALTH TEXT MINING AND INFORMATION ANALYSIS (LOUHI)</title>
				<imprint>
			<date type="published" when="2015">2015</date>
			<biblScope unit="page">64</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Parsing with compositional vector grammars</title>
		<author>
			<persName><forename type="first">R</forename><surname>Socher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Bauer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">Y</forename><surname>Ng</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">ACL (1)</title>
				<imprint>
			<date type="published" when="2013">2013a</date>
			<biblScope unit="page" from="455" to="465" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Recursive deep models for semantic compositionality over a sentiment treebank</title>
		<author>
			<persName><forename type="first">R</forename><surname>Socher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Perelygin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">Y</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chuang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">Y</forename><surname>Ng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Potts</surname></persName>
		</author>
		<author>
			<persName><surname>Citeseer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 25th International Conference Companion on World Wide Web, WWW&apos;16 Companion</title>
				<editor>
			<persName><forename type="first">J</forename><forename type="middle">A</forename></persName>
		</editor>
		<editor>
			<persName><forename type="first">E</forename><surname>León-Guzmán</surname></persName>
		</editor>
		<meeting>the 25th International Conference Companion on World Wide Web, WWW&apos;16 Companion</meeting>
		<imprint>
			<date type="published" when="2013">2013b. 2016</date>
			<biblScope unit="volume">1631</biblScope>
			<biblScope unit="page" from="605" to="610" />
		</imprint>
	</monogr>
	<note>International World Wide Web Conferences Steering Committee</note>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Overview of tass 2016</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>García-Cumbreras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Villena-Román</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Martínez-Cámara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">C</forename><surname>Díaz-Galiano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">T</forename><surname>Martín-Valdivia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">A</forename><surname>Urena-López</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SE-PLN 2016)</title>
				<meeting>TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SE-PLN 2016)<address><addrLine>Salamanca, Spain, September</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">A meta-analysis of state-of-the-art electoral prediction from Twitter data</title>
		<author>
			<persName><forename type="first">D</forename><surname>Gayo-Avello</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Soc. Sci. Comput. Rev</title>
		<imprint>
			<biblScope unit="volume">31</biblScope>
			<biblScope unit="issue">6</biblScope>
			<biblScope unit="page" from="649" to="679" />
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Lexical normalisation of short text messages: Makn sens a #Twitter</title>
		<author>
			<persName><forename type="first">B</forename><surname>Han</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Baldwin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies -Volume 1, HLT&apos;11</title>
				<meeting>the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies -Volume 1, HLT&apos;11<address><addrLine>Stroudsburg, PA, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computational Linguistics</publisher>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="368" to="378" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Scoring, weighting and the vector space model</title>
		<author>
			<persName><forename type="first">C</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Raghavan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Schütze</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">An Introduction to Information Retrieval</title>
				<meeting><address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Cambridge University Press</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">A new anew: evaluation of a word list for sentiment analysis in microblogs</title>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">Å</forename><surname>Nielsen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the ESWC2011 Workshop on &apos;Making Sense of Microposts&apos;: Big things come in small packages</title>
				<meeting>the ESWC2011 Workshop on &apos;Making Sense of Microposts&apos;: Big things come in small packages</meeting>
		<imprint>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="93" to="98" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">Freeling 3.0: Towards wider multilinguality</title>
		<author>
			<persName><forename type="first">L</forename><surname>Padró</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Stanilovsky</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Language Resources and Evaluation Conference (LREC 2012)</title>
				<meeting>the Language Resources and Evaluation Conference (LREC 2012)<address><addrLine>Istanbul, Turkey</addrLine></address></meeting>
		<imprint>
			<publisher>ELRA</publisher>
			<date type="published" when="2012-05">2012. May</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<analytic>
		<title level="a" type="main">Thumbs up?: Sentiment classification using machine learning techniques</title>
		<author>
			<persName><forename type="first">B</forename><surname>Pang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Lee</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Vaithyanathan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the ACL-02 Conference on Empirical Methods in Natural Language Processing -Volume 10</title>
				<meeting>the ACL-02 Conference on Empirical Methods in Natural Language Processing -Volume 10</meeting>
		<imprint>
			<publisher>Association for Computational Linguistics</publisher>
			<date type="published" when="2002">2002</date>
			<biblScope unit="page" from="79" to="86" />
		</imprint>
	</monogr>
	<note>EMNLP &apos;02</note>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Scikit-learn: Machine learning in Python</title>
		<author>
			<persName><forename type="first">F</forename><surname>Pedregosa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Varoquaux</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Gramfort</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Michel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Thirion</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Grisel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Blondel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Prettenhofer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Weiss</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Dubourg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Vanderplas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Passos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Cournapeau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Brucher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Perrot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Duchesnay</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Machine Learning Research</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page" from="2825" to="2830" />
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<analytic>
		<title level="a" type="main">Elhuyar at tass 2013</title>
		<author>
			<persName><forename type="first">X</forename><surname>Saralegi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">I</forename><forename type="middle">S</forename><surname>Vicente</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Sentiment Analysis Workshop at SEPLN (TASS2013)</title>
				<meeting>the Sentiment Analysis Workshop at SEPLN (TASS2013)</meeting>
		<imprint>
			<date type="published" when="2013-09">2013. September</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b22">
	<analytic>
		<title level="a" type="main">On the usefulness of lexical and syntactic processing in polarity classification of twitter messages</title>
		<author>
			<persName><forename type="first">D</forename><surname>Vilares</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Alonso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Gómez-Rodrıguez</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of the Association for Information Science and Technology</title>
		<imprint>
			<date type="published" when="2014">2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b23">
	<analytic>
		<title level="a" type="main">Learning deep architectures for ai</title>
		<author>
			<persName><forename type="first">Bibliografía</forename><surname>Bengio</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yoshua</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Foundations and trends in Machine Learning</title>
				<imprint>
			<date type="published" when="2009">2009</date>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="page" from="1" to="127" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b24">
	<analytic>
		<title level="a" type="main">Cross-linguistic sentiment analysis: From english to spanish</title>
		<author>
			<persName><forename type="first">Julian</forename><surname>Brooke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Milan</forename><surname>Tofiloski</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maite</forename><surname>Taboada</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">RANLP 2009 Organising Committee / ACL</title>
				<editor>
			<persName><forename type="first">En</forename><surname>Galia</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Angelova</forename></persName>
		</editor>
		<editor>
			<persName><forename type="first">Kalina</forename><surname>Bontcheva</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Ruslan</forename><surname>Mitkov</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Nicolas</forename><surname>Nicolov</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Nikolai</forename><surname>Nikolov</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2009">2009</date>
			<biblScope unit="page" from="50" to="54" />
		</imprint>
	</monogr>
	<note>editores</note>
</biblStruct>

<biblStruct xml:id="b25">
	<analytic>
		<title level="a" type="main">A unified architecture for natural language processing: Deep neural networks with multitask learning</title>
		<author>
			<persName><forename type="first">Ronan</forename><surname>Collobert</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jason</forename><surname>Weston</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 25th International Conference on Machine Learning, ICML &apos;08</title>
				<meeting>the 25th International Conference on Machine Learning, ICML &apos;08<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2008">2008</date>
			<biblScope unit="page" from="160" to="167" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b26">
	<analytic>
		<title level="a" type="main">Participación de SINAI DW2Vec en TASS 2015</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">C</forename><surname>Díaz-Galiano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Montejo-Ráez</surname></persName>
		</author>
		<ptr target=".org" />
	</analytic>
	<monogr>
		<title level="m">Proc. of TASS 2015: Workshop on Sentiment Analysis at SEPLN</title>
				<meeting>of TASS 2015: Workshop on Sentiment Analysis at SEPLN</meeting>
		<imprint>
			<publisher>CEUR-WS</publisher>
			<date type="published" when="1397">2015. 1397</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b27">
	<analytic>
		<title level="a" type="main">Sentiment analysis of spanish tweets using a ranking algorithm and skipgrams</title>
		<author>
			<persName><forename type="first">Javi</forename><surname>Fernández</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Yoan</forename><surname>Gutiérrez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>José</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Patricio</forename><surname>Gómez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Andrés</forename><surname>Martínez-Barco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Rafael</forename><surname>Montoyo</surname></persName>
		</author>
		<author>
			<persName><surname>Muñoz</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the TASS workshop at SEPLN</title>
				<meeting>of the TASS workshop at SEPLN</meeting>
		<imprint>
			<date type="published" when="2013">2013. 2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b28">
	<analytic>
		<title level="a" type="main">Overview of tass 2016</title>
		<author>
			<persName><forename type="first">Miguel</forename><surname>García-Cumbreras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Julio</forename><surname>Ángel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Eugenio</forename><surname>Villena-Román</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Manuel</forename><surname>Martínez-Cámara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Carlos Díaz-Galiano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Teresa Martín-Valdivia</surname></persName>
		</author>
		<author>
			<persName><surname>Alfonso Ureña-López</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">En Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SEPLN 2016)</title>
				<meeting><address><addrLine>Salamanca, Spain</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2016-09">2016. September</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b29">
	<analytic>
		<title level="a" type="main">Reducing the dimensionality of data with neural networks</title>
		<author>
			<persName><forename type="first">Geoffrey</forename><forename type="middle">E</forename><surname>Hinton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Ruslan</surname></persName>
		</author>
		<author>
			<persName><surname>Salakhutdinov</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Science</title>
		<imprint>
			<biblScope unit="volume">313</biblScope>
			<biblScope unit="issue">5786</biblScope>
			<biblScope unit="page" from="504" to="507" />
			<date type="published" when="2006">2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b30">
	<analytic>
		<title level="a" type="main">Elirfupv en tass 2014: Análisis de sentimientos, detección de tópicos y análisis de sentimientos de aspectos en twitter</title>
		<author>
			<persName><forename type="first">Lluís</forename><forename type="middle">F</forename><surname>Hurtado</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ferran</forename><surname>Pla</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the TASS workshop at SEPLN</title>
				<meeting>of the TASS workshop at SEPLN</meeting>
		<imprint>
			<date type="published" when="2014">2014. 2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b31">
	<analytic>
		<title level="a" type="main">Elirf-upv en tass 2015: Análisis de sentimientos en twitter</title>
		<author>
			<persName><forename type="first">Lluís-F</forename><surname>Hurtado</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ferran</forename><surname>Pla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Davide</forename><surname>Buscaldi</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of TASS 2015: Workshop on Sentiment Analysis at SEPLN. CEUR-WS.org</title>
				<meeting>of TASS 2015: Workshop on Sentiment Analysis at SEPLN. CEUR-WS.org</meeting>
		<imprint>
			<date type="published" when="1397">2015. 1397</date>
			<biblScope unit="page" from="35" to="40" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b32">
	<monogr>
		<title level="m" type="main">Efficient estimation of word representations in vector space</title>
		<author>
			<persName><forename type="first">Tomas</forename><surname>Mikolov</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Kai</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Greg</forename><surname>Corrado</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jeffrey</forename><surname>Dean</surname></persName>
		</author>
		<idno>CoRR, abs/1301.3781</idno>
		<imprint>
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b33">
	<analytic>
		<title level="a" type="main">Participación de SINAI Word2Vec en TASS 2014</title>
		<author>
			<persName><forename type="first">A</forename><surname>Montejo-Ráez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>García-Cumbreras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">C</forename><surname>Díaz-Galiano</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the TASS workshop at SEPLN</title>
				<meeting>of the TASS workshop at SEPLN</meeting>
		<imprint>
			<date type="published" when="2014">2014. 2014</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b34">
	<analytic>
		<title level="a" type="main">Scikit-learn: Machine learning in python</title>
		<author>
			<persName><forename type="first">Fabian</forename><surname>Pedregosa</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Gaël</forename><surname>Varoquaux</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alexandre</forename><surname>Gramfort</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Vincent</forename><surname>Michel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Bertrand</forename><surname>Thirion</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Olivier</forename><surname>Grisel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Mathieu</forename><surname>Blondel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Peter</forename><surname>Prettenhofer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ron</forename><surname>Weiss</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Vincent</forename><surname>Dubourg</surname></persName>
		</author>
		<author>
			<persName><surname>Others</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">The Journal of Machine Learning Research</title>
		<imprint>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page" from="2825" to="2830" />
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b35">
	<monogr>
		<title level="m" type="main">Tass: Detecting sentiments in spanish tweets</title>
		<author>
			<persName><forename type="first">Saralegi</forename><surname>Urizar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Xabier</forename></persName>
		</author>
		<author>
			<persName><forename type="first">Iñaki</forename><surname>San</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Vicente</forename><surname>Roncal</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
	<note>En TASS 2012 Working Notes</note>
</biblStruct>

<biblStruct xml:id="b36">
	<analytic>
		<title level="a" type="main">Semi-supervised recursive autoencoders for predicting sentiment distributions</title>
		<author>
			<persName><forename type="first">Richard</forename><surname>Socher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jeffrey</forename><surname>Pennington</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Eric</forename><forename type="middle">H</forename><surname>Huang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Andrew</forename><forename type="middle">Y</forename><surname>Ng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Christopher</forename><forename type="middle">D</forename><surname>Manning</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">En Proceedings of the Conference on Empirical Methods in Natural Language Processing, EMNLP &apos;11</title>
		<title level="s">Association for Computational Linguistics</title>
		<meeting><address><addrLine>Stroudsburg, PA, USA</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="151" to="161" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b37">
	<monogr>
		<title level="m">Future information technology</title>
				<imprint>
			<publisher>Springer</publisher>
			<biblScope unit="page" from="34" to="43" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b38">
	<analytic>
		<title level="a" type="main">Análisis de sentimientos, detección de tópicos y análisis de sentimientos de aspectos en twitter</title>
		<author>
			<persName><forename type="first">Lluís</forename><forename type="middle">F</forename><surname>Hurtado</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ferran</forename><surname>Pla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Davide</forename><surname>Buscaldi</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Elirfupv en tass</title>
				<imprint>
			<date type="published" when="2014">2015. 2014. 2014. TASS2014</date>
		</imprint>
	</monogr>
	<note>Elirf-upv en tass 2015: Análisis de en twitter</note>
</biblStruct>

<biblStruct xml:id="b39">
	<analytic>
		<title level="a" type="main">twitter using a support vector machine approach</title>
		<author>
			<persName><surname>Martínez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Ferran</forename><surname>Víctor</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Lluís-F</forename><surname>Pla</surname></persName>
		</author>
		<author>
			<persName><surname>Hurtado</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Dsic-elirf at semeval-2016 task 4: Message polarity classification in</title>
				<imprint>
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b40">
	<analytic>
		<title level="a" type="main">Bilingual Experiments on an Opinion Comparable Corpus</title>
		<author>
			<persName><forename type="first">E</forename><surname>Martínez-Cámara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">T</forename><surname>Martín-Valdivia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">D</forename><surname>Molina-González</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">A</forename><surname>Ureña-López</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</title>
				<meeting>the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</meeting>
		<imprint>
			<date type="published" when="2013">2013</date>
			<biblScope unit="page" from="87" to="93" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b41">
	<analytic>
		<title level="a" type="main">Freeling 3.0: Towards wider multilinguality</title>
		<author>
			<persName><forename type="first">Lluís</forename><surname>Padró</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Evgeny</forename><surname>Stanilovsky ; Thirion</surname></persName>
		</author>
		<author>
			<persName><forename type="first">O</forename><surname>Grisel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Blondel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Prettenhofer</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Weiss</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Dubourg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Vanderplas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Passos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Cournapeau</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Brucher</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Perrot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Duchesnay</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">En Proceedings of the Language Resources and Evaluation Conference (LREC 2012)</title>
				<meeting><address><addrLine>Istanbul</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2011">2012. 2011</date>
			<biblScope unit="volume">12</biblScope>
			<biblScope unit="page" from="2825" to="2830" />
		</imprint>
	</monogr>
	<note>Scikit-learn: Machine learning in Python</note>
</biblStruct>

<biblStruct xml:id="b42">
	<analytic>
		<title level="a" type="main">Tass-2013: Análisis de sentimientos en twitter</title>
		<author>
			<persName><forename type="first">Ferran</forename><surname>Pla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">-F</forename><surname>Lluís</surname></persName>
		</author>
		<author>
			<persName><surname>Hurtado</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">En Proceedings of the TASS workshop at SEPLN 2013. IV Congreso Español de Informática</title>
				<imprint>
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b43">
	<analytic>
		<title level="a" type="main">Political tendency identification in twitter using sentiment analysis techniques</title>
		<author>
			<persName><forename type="first">Ferran</forename><surname>Pla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">-</forename><forename type="middle">F</forename><surname>Lluís</surname></persName>
		</author>
		<author>
			<persName><surname>Hurtado</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">En Proceedings of COLING 2014, the 25th International Conference on Computational Linguistics: Technical Papers, páginas</title>
				<meeting><address><addrLine>Dublin, Ireland</addrLine></address></meeting>
		<imprint>
			<publisher>City University and Association for Computational Linguistics</publisher>
			<date type="published" when="2014-08">2014a. August</date>
			<biblScope unit="page" from="183" to="192" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b44">
	<analytic>
		<author>
			<persName><forename type="first">Ferran</forename><surname>Pla</surname></persName>
		</author>
		<author>
			<persName><forename type="first">-</forename><forename type="middle">F</forename><surname>Lluís</surname></persName>
		</author>
		<author>
			<persName><surname>Hurtado</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">editores, Natural Language Processing and Information Systems</title>
		<title level="s">de Lecture Notes in Computer Science</title>
		<editor>
			<persName><forename type="first">Elisabeth</forename><surname>Métais</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Mathieu</forename><surname>Roche</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">Maguelonne</forename><surname>Teisseire</surname></persName>
		</editor>
		<imprint>
			<publisher>Springer International Publishing</publisher>
			<date type="published" when="2014">2014b</date>
			<biblScope unit="volume">8455</biblScope>
			<biblScope unit="page" from="208" to="213" />
		</imprint>
	</monogr>
	<note>Sentiment analysis in twitter for spanish</note>
</biblStruct>

<biblStruct xml:id="b45">
	<analytic>
		<title level="a" type="main">Elhuyar at tass 2013</title>
		<author>
			<persName><forename type="first">Xabier</forename><surname>Saralegi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">San</forename><surname>Iñaki</surname></persName>
		</author>
		<author>
			<persName><surname>Vicente</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">En Proceedings of the TASS workshop at SEPLN 2013</title>
		<title level="s">IV Congreso Español de Informática</title>
		<imprint>
			<date type="published" when="2013">2013</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b46">
	<analytic>
		<title level="a" type="main">Gti at semeval-2016 task 5: Svm and crf for aspect detection and unsupervised aspect-based sentiment analysis</title>
		<author>
			<persName><forename type="first">Elirf-Upv En Tass ;</forename></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Juncal-Martınez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Fernández-Gavilanes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Costa-Montenegro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">J</forename><surname>González-Castano</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of SemEval</title>
				<meeting>SemEval</meeting>
		<imprint>
			<date type="published" when="2016">2016. 2016</date>
			<biblScope unit="page" from="306" to="311" />
		</imprint>
	</monogr>
	<note>Análisis de Sentimientos en Twitter References Alvarez-López</note>
</biblStruct>

<biblStruct xml:id="b47">
	<analytic>
		<title level="a" type="main">Freeling 1.3: Syntactic and semantic services in an open-source NLP library</title>
		<author>
			<persName><forename type="first">J</forename><surname>Atserias</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Casas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Comelles</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>González</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Padró</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Padró</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of LREC</title>
				<meeting>LREC</meeting>
		<imprint>
			<date type="published" when="2006">2006</date>
			<biblScope unit="volume">6</biblScope>
			<biblScope unit="page" from="48" to="55" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b48">
	<analytic>
		<title level="a" type="main">Learning deep architectures for AI</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Bengio</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Found. Trends Mach. Learn</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="issue">1</biblScope>
			<biblScope unit="page" from="1" to="127" />
			<date type="published" when="2009-01">2009. January</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b49">
	<monogr>
		<title level="m" type="main">On using Twitter to monitor political sentiment and predict election results</title>
		<author>
			<persName><forename type="first">A</forename><surname>Bermingham</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">F</forename><surname>Smeaton</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b50">
	<analytic>
		<title level="a" type="main">Cross-linguistic sentiment analysis: From english to spanish</title>
		<author>
			<persName><forename type="first">J</forename><surname>Brooke</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Tofiloski</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Taboada</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">RANLP 2009 Organising Committee / ACL</title>
				<editor>
			<persName><forename type="first">G</forename><surname>Angelova</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">K</forename><surname>Bontcheva</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">R</forename><surname>Mitkov</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Nicolov</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">N</forename><surname>Nikolov</surname></persName>
		</editor>
		<imprint>
			<date type="published" when="2009">2009</date>
			<biblScope unit="page" from="50" to="54" />
		</imprint>
	</monogr>
	<note>RANLP</note>
</biblStruct>

<biblStruct xml:id="b51">
	<analytic>
		<title level="a" type="main">Libsvm: a library for support vector machines</title>
		<author>
			<persName><forename type="first">C.-C</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C.-J</forename><surname>Lin</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM Transactions on Intelligent Systems and Technology (TIST)</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="issue">3</biblScope>
			<biblScope unit="page">27</biblScope>
			<date type="published" when="2011">2011</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b52">
	<analytic>
		<title level="a" type="main">Comparative experiments on sentiment classification for online product reviews</title>
		<author>
			<persName><forename type="first">H</forename><surname>Cui</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Mittal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Datar</surname></persName>
		</author>
		<author>
			<persName><surname>Santos</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 21st National Conference on Artificial Intelligence -Volume 2, AAAI&apos;06</title>
				<editor>
			<persName><forename type="first">M</forename><surname>Gatti</surname></persName>
		</editor>
		<meeting>the 21st National Conference on Artificial Intelligence -Volume 2, AAAI&apos;06</meeting>
		<imprint>
			<publisher>AAAI Press</publisher>
			<date type="published" when="2006">2006. 2014</date>
			<biblScope unit="page" from="69" to="78" />
		</imprint>
	</monogr>
	<note>COLING</note>
</biblStruct>

<biblStruct xml:id="b53">
	<analytic>
		<title level="a" type="main">Lexical normalization of spanish tweets with preprocessing rules, domain-specific edit distances, and language models</title>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">R</forename><surname>Fabo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Cuadros</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Etchegoyhen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Tweet Normalization Workshop co-located with 29th Conference of the Spanish Society for Natural Language Processing (SEPLN 2013)</title>
				<meeting>the Tweet Normalization Workshop co-located with 29th Conference of the Spanish Society for Natural Language Processing (SEPLN 2013)<address><addrLine>Madrid, Spain</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2013-09-20">2013. September 20th, 2013</date>
			<biblScope unit="page" from="59" to="63" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b54">
	<analytic>
		<title level="a" type="main">Unsupervised method for sentiment analysis in online texts</title>
		<author>
			<persName><forename type="first">M</forename><surname>Fernández-Gavilanes</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T</forename><surname>Álvarez-López</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Juncal-Martínez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Costa-Montenegro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><forename type="middle">J</forename><surname>González-Castaño</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Expert Systems with Applications</title>
		<imprint>
			<biblScope unit="volume">58</biblScope>
			<biblScope unit="page" from="57" to="75" />
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b55">
	<analytic>
		<title level="a" type="main">Lexical normalisation of short text messages: Makn sens a #twitter</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>García-Cumbreras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Villena-Román</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><surname>Martínez-Cámara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">C</forename><surname>Díaz-Galiano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">T</forename><surname>Martín-Valdivia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">A</forename><surname>Ureña-López ; Han</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 49th Annual Meeting the Associa-Computational Linguistics: Human Language Technologies -Volume 1, HLT &apos;11</title>
				<meeting>the 49th Annual Meeting the Associa-Computational Linguistics: Human Language Technologies -Volume 1, HLT &apos;11<address><addrLine>Salamanca, Spain; B. and T. Baldwin; Stroudsburg, PA, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computational Linguistics</publisher>
			<date type="published" when="2011">2016. 2011</date>
			<biblScope unit="page" from="368" to="378" />
		</imprint>
	</monogr>
	<note>Proceedings of TASS 2016: Workshop on Sentiment Analysis at SEPLN co-located with the 32nd SEPLN Conference (SE-PLN 2016)</note>
</biblStruct>

<biblStruct xml:id="b56">
	<analytic>
		<title level="a" type="main">ELiRF-UPV en TASS 2015: Análisis de sentimientos en Twitter</title>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">F</forename><surname>Hurtado</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Plà</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Bus</surname></persName>
		</author>
		<author>
			<persName><forename type="first">-</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2015: Workshop on Sentiment Analysis at SEPLN colocated with 31st SEPLN Conference (SE-PLN 2015)</title>
				<meeting>TASS 2015: Workshop on Sentiment Analysis at SEPLN colocated with 31st SEPLN Conference (SE-PLN 2015)<address><addrLine>Alicante, Spain</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2015-09-15">2015. September 15, 2015</date>
			<biblScope unit="page" from="75" to="79" />
		</imprint>
	</monogr>
	<note>caldi</note>
</biblStruct>

<biblStruct xml:id="b57">
	<analytic>
		<title level="a" type="main">A clustering-based approach on sentiment analysis</title>
		<author>
			<persName><forename type="first">G</forename><surname>Li</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Liu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Intelligent Systems and Knowledge Engineering (ISKE), 2010 International Conference on</title>
				<imprint>
			<publisher>IEEE</publisher>
			<date type="published" when="2010">2010</date>
			<biblScope unit="page" from="331" to="337" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b58">
	<analytic>
		<title level="a" type="main">Sentiment Analysis and Opinion Mining</title>
		<author>
			<persName><forename type="first">B</forename><surname>Liu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Synthesis Lectures on Human Language Technologies</title>
				<imprint>
			<publisher>Morgan &amp; Claypool Publishers</publisher>
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b59">
	<analytic>
		<title level="a" type="main">Tass 2015 -the evolution of the spanish opinion mining systems</title>
		<author>
			<persName><forename type="first">E</forename><surname>Martínez-Cámara</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>García-Cumbreras</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Villena-Román</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>García-Morera</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Procesamiento del Lenguaje Natural</title>
		<imprint>
			<biblScope unit="volume">56</biblScope>
			<biblScope unit="page" from="33" to="40" />
			<date type="published" when="2016">2016</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b60">
	<analytic>
		<title level="a" type="main">Nrc-canada: Building the state-of-the-art in sentiment analysis of tweets</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">M</forename><surname>Mohammad</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Kiritchenko</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Zhu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the seventh international workshop on Semantic Evaluation Exercises (SemEval-2013)</title>
				<meeting>the seventh international workshop on Semantic Evaluation Exercises (SemEval-2013)<address><addrLine>Atlanta, Georgia, USA</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2013-06">2013. June</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b61">
	<analytic>
		<title level="a" type="main">Twitter as a corpus for sentiment analysis and opinion mining</title>
		<author>
			<persName><forename type="first">A</forename><surname>Pak</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Paroubek</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC&apos;10)</title>
				<editor>
			<persName><forename type="first">N</forename><forename type="middle">C C</forename><surname>Chair</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">)</forename></persName>
		</editor>
		<editor>
			<persName><forename type="first">K</forename><surname>Choukri</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">B</forename><surname>Maegaard</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Mariani</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">J</forename><surname>Odijk</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">S</forename><surname>Piperidis</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">M</forename><surname>Rosner</surname></persName>
		</editor>
		<editor>
			<persName><forename type="first">D</forename><surname>Tapias</surname></persName>
		</editor>
		<meeting>the Seventh International Conference on Language Resources and Evaluation (LREC&apos;10)<address><addrLine>Valletta, Malta</addrLine></address></meeting>
		<imprint>
			<publisher>ELRA</publisher>
			<date type="published" when="2010">2010</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b62">
	<analytic>
		<title level="a" type="main">Twitter, myspace, digg: Unsupervised sentiment analysis in social media</title>
		<author>
			<persName><forename type="first">G</forename><surname>Paltoglou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Thelwall</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM Transactions on Intelligent Systems and Technology (TIST)</title>
		<imprint>
			<biblScope unit="volume">3</biblScope>
			<biblScope unit="issue">4</biblScope>
			<biblScope unit="page">66</biblScope>
			<date type="published" when="2012">2012</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b63">
	<analytic>
		<title level="a" type="main">Opinion mining and sentiment analysis</title>
		<author>
			<persName><forename type="first">B</forename><surname>Pang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Lee</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Found. Trends Inf. Retr</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="issue">1-2</biblScope>
			<biblScope unit="page" from="1" to="135" />
			<date type="published" when="2008-01">2008. January</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b64">
	<analytic>
		<title level="a" type="main">Sentiment classification using sociolinguistic clusters</title>
		<author>
			<persName><forename type="first">S</forename><surname>Park</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2015: Workshop on Sentiment Analysis at SEPLN co-located with 31st SEPLN Conference (SEPLN 2015)</title>
				<meeting>TASS 2015: Workshop on Sentiment Analysis at SEPLN co-located with 31st SEPLN Conference (SEPLN 2015)<address><addrLine>Alicante, Spain</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2015-09-15">2015. September 15, 2015</date>
			<biblScope unit="page" from="99" to="104" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b65">
	<analytic>
		<title level="a" type="main">Lys at TASS 2015: Deep learning experiments for sentiment analysis on spanish tweets</title>
		<author>
			<persName><forename type="first">D</forename><surname>Vilares</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Doval</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Alonso</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Gómez-Rodríguez</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of TASS 2015: Workshop on Sentiment Analysis at SEPLN colocated with 31st SEPLN Conference (SE-PLN 2015)</title>
				<meeting>TASS 2015: Workshop on Sentiment Analysis at SEPLN colocated with 31st SEPLN Conference (SE-PLN 2015)<address><addrLine>Alicante, Spain</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2015-09-15">2015. September 15, 2015</date>
			<biblScope unit="page" from="47" to="52" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b66">
	<analytic>
		<title level="a" type="main">Topic sentiment analysis in Twitter: A graph-based hashtag sentiment classification approach</title>
		<author>
			<persName><forename type="first">X</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Wei</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Liu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zhou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Zhang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 20th ACM International Conference on Information and Knowledge Management, CIKM &apos;11</title>
				<meeting>the 20th ACM International Conference on Information and Knowledge Management, CIKM &apos;11<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>ACM</publisher>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="1031" to="1040" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b67">
	<analytic>
		<title level="a" type="main">Ecnu: Leveraging word embeddings to boost performance for paraphrase in Twitter</title>
		<author>
			<persName><forename type="first">J</forename><surname>Zhao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Lan</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 9th International Workshop on Semantic Evaluation (SemEval 2015)</title>
				<meeting>the 9th International Workshop on Semantic Evaluation (SemEval 2015)<address><addrLine>Denver, Colorado</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computational Linguistics</publisher>
			<date type="published" when="2015-06">2015. June</date>
			<biblScope unit="page" from="34" to="39" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
