<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">An Audiovisual Corpus of Guided Tours in Cultural Sites: Data Collection protocols in the CHROME Project</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Antonio</forename><surname>Origlia</surname></persName>
							<email>antonio.origlia@unina.it</email>
						</author>
						<author>
							<persName><forename type="first">Renata</forename><surname>Savy</surname></persName>
							<email>rsavy@unisa.it</email>
						</author>
						<author>
							<persName><forename type="first">Isabella</forename><surname>Poggi</surname></persName>
							<email>isabella.poggi@uniroma3.it</email>
						</author>
						<author>
							<persName><forename type="first">Francesco</forename><surname>Cutugno</surname></persName>
							<email>cutugno@unina.it</email>
						</author>
						<author>
							<persName><forename type="first">Iolanda</forename><surname>Alfano</surname></persName>
							<email>ialfano@unisa.it</email>
						</author>
						<author>
							<persName><forename type="first">Francesca</forename><forename type="middle">D '</forename><surname>Errico</surname></persName>
							<email>francesca.derrico@uniroma3.it</email>
						</author>
						<author>
							<persName><forename type="first">Laura</forename><surname>Vincze</surname></persName>
							<email>laura.vincze@gmail.com</email>
						</author>
						<author>
							<persName><forename type="first">Violetta</forename><surname>Cataldo</surname></persName>
							<email>violetta.cataldo@live.itt</email>
						</author>
						<author>
							<affiliation key="aff0">
								<orgName type="department">URBAN/ECO Research Center</orgName>
								<orgName type="institution">University of Naples</orgName>
								<address>
									<addrLine>&quot;Federico II&quot; Naples</addrLine>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff1">
								<orgName type="department">Department of Humanities Studies</orgName>
								<orgName type="institution">University of Salerno Salerno</orgName>
								<address>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff2">
								<orgName type="department">Department of Philosophy</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff3">
								<orgName type="department">Communication and Performing Arts</orgName>
								<orgName type="institution">Roma Tre University Rome</orgName>
								<address>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff4">
								<orgName type="department">Department of Electrical Engineering and Information Technology</orgName>
								<orgName type="institution">University of Naples &quot;Federico II&quot; Naples</orgName>
								<address>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff5">
								<orgName type="department">Department of Humanities Studies</orgName>
								<orgName type="institution">University of Salerno Salerno</orgName>
								<address>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff6">
								<orgName type="department">Department of Philosophy</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff7">
								<orgName type="department">Communication and Performing Arts</orgName>
								<orgName type="institution">Roma Tre University Rome</orgName>
								<address>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff8">
								<orgName type="department">Department of Philosophy</orgName>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff9">
								<orgName type="department">Communication and Performing Arts</orgName>
								<orgName type="institution">Roma Tre University Rome</orgName>
								<address>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<affiliation key="aff10">
								<orgName type="department">Department of Humanities Studies</orgName>
								<orgName type="institution">University of Salerno Salerno</orgName>
								<address>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">An Audiovisual Corpus of Guided Tours in Cultural Sites: Data Collection protocols in the CHROME Project</title>
					</analytic>
					<monogr>
						<imprint>
							<date/>
						</imprint>
					</monogr>
					<idno type="MD5">D11B5B40F6B91BFA0E64C37DB344872E</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2023-03-23T22:28+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Corpus collection</term>
					<term>guided tours</term>
					<term>social signal processing</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Creating interfaces for cultural heritage access is considered a fundamental research field because of the many beneficial effects it has on society. In this era of significant advances towards natural interaction with machines and deeper understanding of social communication nuances, it is important to investigate the communicative strategies human experts adopt when delivering contents to the visitors of cultural sites, as this allows the creation of a strong theoretical background for the development of efficient conversational agents. In this work, we present the data collection and annotation protocols adopted for the ongoing creation of the reference material to be used in the Cultural Heritage Resources Orienting Multimodal Experiences (CHROME) project to accomplish that goal.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>CCS CONCEPTS</head><p>• Human-centered computing → User studies; HCI theory, concepts and models; User models;</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1">INTRODUCTION</head><p>Developing Social Signal Processing <ref type="bibr" target="#b14">[15]</ref> techniques for advanced, natural interfaces requires a significant analysis effort on multiple aspects of communication between individuals engaging in social activity. Collecting meaningful corpora to document the multimodal signals people exchange during these activities has been the subject of a large amount of research. Among others, available corpora document meetings <ref type="bibr" target="#b5">[6]</ref>, intercultural dynamics of first acquaintance <ref type="bibr" target="#b0">[1]</ref>, phone calls between non acquainted subjects <ref type="bibr" target="#b9">[10]</ref>, and two-person dialogues <ref type="bibr" target="#b13">[14]</ref>. The Italian national project CHROME aims at developing a data collection and annotation procedure to support the development of new interactive technologies for cultural heritage. The project concentrates on the three Campanian Charterhouses: an integrated description of these from different point of views (textual, behavioural, geometrical, etc. . . ) is being developed.</p><p>In this paper, we present the data collection and annotation protocols adopted in the CHROME project to obtain reference material of expert gatekeepers, intended as holders of knowledge for others to refer to, accompanying visitors of cultural sites. This data will be used to investigate the social communication strategies adopted by the considered experts to deliver information to different groups of visitors. By comparing different experts (inter-subject comparisons) and different groups accompanied by the same expert (intra-subject comparison) a Gatekeeper Computational Model will be obtained and, on the basis of this model, a socially aware conversational agent, in the form of a 3D avatar, will be developed. This is expected to improve the capabilities of an interactive agent to involve people in engaging presentations of cultural heritage. These will make use of the 3D reconstructions of the three Campanian Charterhouses, also collected in the framework of the CHROME project. Upon completion of the project, the dataset will be made freely available for the scientific community.</p><p>In the next sections, we will present the data collection protocol, highlighting the chosen recording positions in the site of interest and the recording setup. We will, then, present the multimodal annotation protocol, designed to provide a formal description of how the guide makes use of social signals exchange to adapt the presentation and to effectively support the verbal transfer of cultural contents. Next, we will describe the informative, syntactic and prosodic annotations documenting the linguistic behaviour that characterises the domain expert. The transcribed recordings, together with the produced annotations, will be compared with a corpus of textual resources describing the objects of interest. This will support the development of a synthetic voice model for 3D avatars designed to extract cultural contents from textual databases and deliver them using social communication strategies. To improve the quality of the model, the linguistic analysis will also include a detailed annotation of disfluency phenomena, which are important to produce a natural sounding voice.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2">DATA COLLECTION</head><p>The data collection plan foresees a campaign of audiovisual recordings involving four art historians with strong experience in accompanying groups of visitors. Given the limited number of gatekeepers considered in the CHROME project, only female experts were recruited to remove gender effects in multimodal and linguistic analysis. Future extensions of the corpus will include male experts as well.</p><p>Recorded data include two Full-HD video recordings: the first one is a fixed shot of the gatekeeper, taken from a position immediately next to the attending group while the second one is a fixed shot of the visitors. A close range digital microphone with background noise cancellation is used to record the gatekeeper's voice. Immediately after the visit, the recruited visitors compile a questionnaire composed of 23 items including both Likert scale evaluations and open answer questions. The items are designed to collect anagraphic data, a self-evaluation of artistic competence, an evaluation of personal satisfaction after the visit and an evaluation of the gatekeeper's performance. These data will be used to weight objective measures of social behaviour.</p><p>Each recruited expert accompanies four groups of four people in an hour long guided tour at the San Martino Charterhouse in Naples. Recruited members of the audience vary on a socio-demographic basis and each group is gender balanced. The visit is divided into six points of interest (POIs), selected as the most relevant parts of the Charterhouse from an architectural and artistic point of view:</p><p>• Pronaos: outside the doorstep of the church. The introductory part of the visit is recorded in this POI. Environmental elements mainly consist of architectural details; • Great cloister: a large external place, near the monks' cemetery. Further details about the monks' life are given. Environmental elements consist of the natural setting of a large garden and of the cemetery elements (e.g. memento mori); • Parlor: the first internal setting. Specific details about the Charthusians' rules are given here. Environmental elements mainly consist of frescoes; • Chapter hall: next to the parlor. Specific details about the Charthusians' order are given here. Environmental elements mainly consist of frescoes; • Wooden choir: inside the church, behind the altar. The history of the church decoration process is given here. Environmental elements consist of both architectural details (e.g. the choir and the harmonic chassis) and artistic elements (frescoes and statues); • Treasure hall: deeper inside the complex. Details about the relationship between the monks and the different governing parties in Naples are given. Environmental elements mainly consist of architectural details. The selected POIs allow us to capture the social behaviour visitors and gatekeepers exhibit to negotiate the approach to the visit and to document postural and gestural behaviour of an art historian presenting a complex environment.</p><p>Videos and audio recordings are synchronised a posteriori using a visual-acoustic marker. Linguistic and multimodal annotations, performed on the synchronised versions of the collected material, will be merged using the ELAN software <ref type="bibr" target="#b16">[17]</ref>. An ELAN project file will be produced for each POI visit in order to allow cross-domain research and closed vocabularies for the label sets belonging to each annotation domain will be used to ensure consistency. An example of the ELAN interface showing the two video shots and a sample annotation tier is shown in Figure <ref type="figure" target="#fig_0">1</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3">MULTIMODAL ANNOTATION</head><p>The video recording of the expert gatekeeper is annotated as to the structure of verbal discourse and to body communicative behaviour. The discourse structure point of view is based on a previous analysis on videos of Art Commentators (ACs), that is, both museums gatekeepers and art historians illustrating artworks in tv, where a general script was extracted of what the AC can /should say in one's work. This allowed to outline the typical discourse structure of any AC which, based on the analysis of discourse as a hierarchy of goals <ref type="bibr" target="#b7">[8]</ref>, distinguishes four main goals pursued by the gatekeeper: a general goal of cultural elevation; encompassing favouring aesthetic enjoyment, imagination and emotion triggering, and, subsumed to it; the textual goals of providing information about the opera, its history, function, cultural milieu, and the author; the corresponding modal goals of attracting and sustaining attention, favouring comprehension and inferential connections with the tourists' previous knowledge; interactional goals such as tuning, setting empathic connection with tourists. Each particular performance of a gatekeeper or other AC can be analysed in terms of this abstract script, and this allows, among other things, to distinguish the idiosyncratic styles of different ACs in terms of which nodes of the structure they prefer to expand. Some mainly focus on the author and his life, some on the deep symbolic meanings of the artwork, some on the author's style and the surrounding cultural milieu, and so on. The analysis of the gatekeeper's multimodal communication takes into account the following body communicative modalities: gestures, postures, head movements, facial expression, gaze communication. For each communicative item in each modality, the signal is annotated in ELAN in terms of a detailed description of its production: gestures are described according to their parameters of hand configuration, location, orientation and movement; gaze in terms of eye direction, eyebrows and eyelids movements; face in terms of Ekman's FACS; head movements in term of head nod, shake, toss, canting; postures in terms of leg and trunk movements. Then, for the signal described in this way, a verbal phrasing of its meaning is provided (after <ref type="bibr" target="#b8">[9]</ref>). Based on this meaning, the item is classified as to its role and function within the gatekeeper's discourse structure. An example of multimodal annotation is shown in Table <ref type="table" target="#tab_0">1</ref>. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4">LINGUISTIC ANNOTATION</head><p>Using the close-mic recordings, speech produced by the expert gatekeeper is analysed and annotated on different levels. From the informative-syntactic point of view, an orthographic level is produced on the basis of the indications provided by <ref type="bibr" target="#b10">[11]</ref>. This level involves the transcription of a number of elements: lexical elements, silent and filled pauses, noises, vocal (nonverbal) phenomena, truncated words, interrupted words, false starts and lapsus linguae. A phonetic level is included to store the phonetic transcription of the utterances and markers of phonetic phenomena like coarticulation, following the indications found in <ref type="bibr" target="#b11">[12]</ref>. A syllabic level is produced to allow speech fluency and speech rate analyses. A disfluency level, involving the annotation of disfluency phenomena <ref type="bibr" target="#b2">[3,</ref><ref type="bibr" target="#b12">13]</ref>, is also included. This analysis level consists of four annotation tiers, detailed in Table <ref type="table" target="#tab_1">2</ref>.</p><p>To document the prosodic component of the experts' linguistic behaviour, a multilevel annotation, structured in different tiers, has been produced. The considered aspects include: an intonative level, using the INTSINT coding scheme <ref type="bibr" target="#b3">[4,</ref><ref type="bibr" target="#b4">5]</ref>, providing a labels sequence representing the f0 curve, obtained with the Prosomarker tool <ref type="bibr" target="#b6">[7]</ref>; a pragmatic -informative level, providing an analysis of information structure considering topic (preposed or postposed) and comment units <ref type="bibr" target="#b1">[2]</ref>; a macro-syntactic level, indicating the types of clauses dividing independent clauses from dependent clauses and specifying the type of subordination; a syntactic level, describing the main syntactic functions; an intra-syntactic level, labelling the type of phrase and its composition (between parenthesis); a measure of syntactic weight, based on <ref type="bibr" target="#b15">[16]</ref>, which takes into account both the structure and the length of constituents. It considers the following features: ± presence of determiners, ± presence of modifiers, ± presence of pronouns, ± verbal valency saturation. An annotation example is shown in Figure <ref type="figure" target="#fig_2">2</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5">CONCLUSIONS AND FUTURE WORK</head><p>We have presented the data collection and annotations protocols for a work in progress on an audiovisual corpus documenting how cultural heritage gatekeepers support people in accessing architectural heritage and consists of both video and audio recordings to capture the social interaction process taking place between the  group guide and the attending audience. Annotation levels cover linguistic and multimodal aspects of communication to allow a multi-faceted investigation of the ongoing communicative process.</p><p>The collected material will be used as reference to build a computational model of a 3D virtual character presenting reconstructions of architectural heritage sites.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: A screenshot of the ELAN interface showing the synchronised videos of the expert and of the audience, together with an example annotation.</figDesc><graphic coords="2,53.80,83.69,240.24,124.44" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Figure 2 :</head><label>2</label><figDesc>Figure2: An example on the utterance I certosini devono la fondazione del loro ordine a un uomo (Carthusians due their order's foundation to a man). The order of the annotation tiers is the one found in the text.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1 :</head><label>1</label><figDesc>An example of multimodality annotation.</figDesc><table><row><cell>Verbal Text</cell><cell>Discourse function</cell><cell>Gesture</cell><cell>Meaning</cell></row><row><cell>The Saint Martin's Charterhouse here in Naples has at least two souls</cell><cell>Textual goal: Information on the artwork</cell><cell>hands, palms to each other, like framing something</cell><cell>I am framing the object of discourse, Metacognitive gesture</cell></row><row><cell>Nowadays it is not only a Charterhouse</cell><cell>Textual goal: Information on the identity of the artwork</cell><cell>Left hand moves to left, Metadiscursive gesture</cell><cell>I locate the identity Charterhouse on my left → I build the first entity</cell></row><row><cell>but it is also a national museum</cell><cell>Textual goal: Information on the identity of the artwork</cell><cell>Right hand moves to right. Metadiscursive gesture</cell><cell>I locate the identity known as Museum on my right -&gt; I build the second entity</cell></row><row><cell>So try to imagine Naples 700 years ago</cell><cell>Emotional goal: Solicit imagination</cell><cell>-</cell><cell>-</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2 :</head><label>2</label><figDesc>Disfluency annotation levels</figDesc><table><row><cell>Disfluency</cell><cell>Type of disfluent</cell></row><row><cell>Type</cell><cell>phenomenon</cell></row><row><cell>Disfluency</cell><cell>Pragmatic function of</cell></row><row><cell>Function</cell><cell>the disfluent phenomenon</cell></row><row><cell>Disfluency Model</cell><cell>Model of occurrence</cell></row><row><cell>Disfluency</cell><cell>Internal regions of</cell></row><row><cell>Components</cell><cell>the phenomenon</cell></row></table></figure>
		</body>
		<back>

			<div type="acknowledgement">
<div xmlns="http://www.tei-c.org/ns/1.0"><p>6 ACKNOWLEDGMENTS Antonio Origlia's work is funded by the Italian PRIN project Cultural Heritage Resources Orienting Multimodal Experience (CHROME) #B52F15000450001.</p></div>
			</div>

			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">Intercultural dynamics of fist acquaintance: comparative study of swedish, chinese and swedishchinese first time encounters</title>
		<author>
			<persName><forename type="first">Jens</forename><surname>Allwood</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Nataliya</forename><surname>Berbyuk Lindström</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jia</forename><surname>Lu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">International Conference on Universal Access in Human-Computer Interaction</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="12" to="21" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Universals of topic-comment structure</title>
		<author>
			<persName><forename type="first">Jeanette</forename><forename type="middle">K</forename><surname>Gundel</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Studies in syntactic typology</title>
		<imprint>
			<biblScope unit="volume">17</biblScope>
			<biblScope unit="page" from="209" to="239" />
			<date type="published" when="1988">1988. 1988</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">A content-processing view of hesitation phenomena</title>
		<author>
			<persName><forename type="first">Adolf</forename><forename type="middle">E</forename><surname>Hieke</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Language and Speech</title>
		<imprint>
			<biblScope unit="volume">24</biblScope>
			<biblScope unit="issue">2</biblScope>
			<biblScope unit="page" from="147" to="160" />
			<date type="published" when="1981">1981. 1981</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<analytic>
		<title level="a" type="main">A survey of intonation systems</title>
		<author>
			<persName><forename type="first">Daniel</forename><surname>Hirst</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Albert</forename><surname>Di</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Cristo</forename></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Intonation systems: A survey of twenty languages</title>
				<imprint>
			<date type="published" when="1998">1998. 1998</date>
			<biblScope unit="page" from="1" to="44" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Levels of representation and levels of analysis for the description of intonation systems</title>
		<author>
			<persName><forename type="first">Daniel</forename><surname>Hirst</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Albert</forename><surname>Di Cristo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Robert</forename><surname>Espesser</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Prosody: Theory and experiment</title>
				<imprint>
			<publisher>Springer</publisher>
			<date type="published" when="2000">2000</date>
			<biblScope unit="page" from="51" to="87" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">The AMI meeting corpus</title>
		<author>
			<persName><forename type="first">Iain</forename><surname>Mccowan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Jean</forename><surname>Carletta</surname></persName>
		</author>
		<author>
			<persName><surname>Kraaij</surname></persName>
		</author>
		<author>
			<persName><surname>Ashby</surname></persName>
		</author>
		<author>
			<persName><surname>Bourban</surname></persName>
		</author>
		<author>
			<persName><surname>Flynn</surname></persName>
		</author>
		<author>
			<persName><surname>Guillemot</surname></persName>
		</author>
		<author>
			<persName><surname>Hain</surname></persName>
		</author>
		<author>
			<persName><surname>Kadlec</surname></persName>
		</author>
		<author>
			<persName><surname>Karaiskos</surname></persName>
		</author>
		<author>
			<persName><surname>Others</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 5th International Conference on Methods and Techniques in Behavioral Research</title>
				<meeting>the 5th International Conference on Methods and Techniques in Behavioral Research</meeting>
		<imprint>
			<date type="published" when="2005">2005</date>
			<biblScope unit="volume">88</biblScope>
			<biblScope unit="page">100</biblScope>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">Prosomarker: a prosodic analysis tool based on optimal pitch stylization and automatic syllabi fication</title>
		<author>
			<persName><forename type="first">Antonio</forename><surname>Origlia</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Iolanda</forename><surname>Alfano</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the International Conference on Language Resources and Evaluation (LREC)</title>
				<meeting>of the International Conference on Language Resources and Evaluation (LREC)</meeting>
		<imprint>
			<date type="published" when="2012">2012</date>
			<biblScope unit="page" from="997" to="1002" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">The discourse as a hierarchy of goals</title>
		<author>
			<persName><forename type="first">Domenico</forename><surname>Parisi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Cristiano</forename><surname>Castelfranchi</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Centro Internazionale di Semiotica e di Linguistica</title>
				<imprint>
			<date type="published" when="1976">1976</date>
		</imprint>
		<respStmt>
			<orgName>Università di Urbino</orgName>
		</respStmt>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<monogr>
		<title level="m" type="main">Mind, hands, face and body: a goal and belief view of multimodal communication</title>
		<author>
			<persName><forename type="first">Isabella</forename><surname>Poggi</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2007">2007</date>
			<publisher>Weidler</publisher>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">Automatic detection of laughter and fillers in spontaneous mobile phone conversations</title>
		<author>
			<persName><forename type="first">Hugues</forename><surname>Salamin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Anna</forename><surname>Polychroniou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alessandro</forename><surname>Vinciarelli</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">IEEE International Conference on. IEEE</title>
				<imprint>
			<date type="published" when="2013">2013. 2013</date>
			<biblScope unit="page" from="4282" to="4287" />
		</imprint>
	</monogr>
	<note>Systems, Man, and Cybernetics (SMC)</note>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Specifiche per la trascrizione ortografica annotata dei testi</title>
		<author>
			<persName><forename type="first">Renata</forename><surname>Savy</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Italiano Parlato</title>
				<imprint>
			<date type="published" when="2005">2005. 2005</date>
			<biblScope unit="page" from="1" to="28" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<monogr>
		<author>
			<persName><forename type="first">Renata</forename><surname>Savy</surname></persName>
		</author>
		<title level="m">Specifiche per l&apos;etichettatura dei livelli segmentali</title>
				<meeting><address><addrLine>Napoli</addrLine></address></meeting>
		<imprint>
			<publisher>Liguori</publisher>
			<date type="published" when="2005">2005. 2005</date>
		</imprint>
	</monogr>
	<note>Italiano Parlato. Analisi di un dialogo</note>
</biblStruct>

<biblStruct xml:id="b12">
	<monogr>
		<title level="m" type="main">Preliminaries to a theory of speech disfluencies</title>
		<author>
			<persName><forename type="first">Elizabeth</forename><surname>Ellen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Shriberg</forename></persName>
		</author>
		<imprint>
			<date type="published" when="1994">1994</date>
			<publisher>Citeseer</publisher>
		</imprint>
	</monogr>
	<note type="report_type">Ph.D. Dissertation</note>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Real-time sociometrics from audio-visual features for two-person dialogs</title>
		<author>
			<persName><forename type="first">Yasir</forename><surname>Tahir</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Debsubhra</forename><surname>Chakraborty</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Tomasz</forename><surname>Maszczyk</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Shoko</forename><surname>Dauwels</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Justin</forename><surname>Dauwels</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Nadia</forename><surname>Thalmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Daniel</forename><surname>Thalmann</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">IEEE International Conference on. IEEE</title>
				<imprint>
			<date type="published" when="2015">2015. 2015</date>
			<biblScope unit="page" from="823" to="827" />
		</imprint>
	</monogr>
	<note>Digital Signal Processing (DSP)</note>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Social signal processing: Survey of an emerging domain</title>
		<author>
			<persName><forename type="first">Alessandro</forename><surname>Vinciarelli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Maja</forename><surname>Pantic</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Hervé</forename><surname>Bourlard</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Image and vision computing</title>
		<imprint>
			<biblScope unit="volume">27</biblScope>
			<biblScope unit="issue">12</biblScope>
			<biblScope unit="page" from="1743" to="1759" />
			<date type="published" when="2009">2009. 2009</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Il peso del parlare e dello scrivere</title>
		<author>
			<persName><forename type="first">Miriam</forename><surname>Voghera</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Giuseppina</forename><surname>Turco</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of International Conf. Il Parlato Italiano</title>
				<meeting>of International Conf. Il Parlato Italiano<address><addrLine>Liguori, Napoli</addrLine></address></meeting>
		<imprint>
			<date type="published" when="2007">2007</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">ELAN: a professional framework for multimodality research</title>
		<author>
			<persName><forename type="first">Peter</forename><surname>Wittenburg</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Hennie</forename><surname>Brugman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Albert</forename><surname>Russel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Alex</forename><surname>Klassmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Han</forename><surname>Sloetjes</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proc. of the International Conference on Language Resources and Evaluation (LREC)</title>
				<meeting>of the International Conference on Language Resources and Evaluation (LREC)</meeting>
		<imprint>
			<date type="published" when="2006">2006</date>
			<biblScope unit="page" from="1556" to="1559" />
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
