<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Leveraging Prompt Engineering and Large Language Models for Automating MADRS Score Computation for Depression Severity Assessment</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Alessandro</forename><surname>Raganato</surname></persName>
							<email>alessandro.raganato@unimib.it</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Informatics, Systems, and Communication</orgName>
								<orgName type="institution">University of Milano-Bicocca</orgName>
								<address>
									<settlement>Milan</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Francesco</forename><surname>Bartoli</surname></persName>
							<email>francesco.bartoli@unimib.it</email>
							<affiliation key="aff1">
								<orgName type="department">School of Medicine and Surgery</orgName>
								<orgName type="institution">University of Milano-Bicocca</orgName>
								<address>
									<settlement>Monza</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Cristina</forename><surname>Crocamo</surname></persName>
							<email>cristina.crocamo@unimib.it</email>
							<affiliation key="aff1">
								<orgName type="department">School of Medicine and Surgery</orgName>
								<orgName type="institution">University of Milano-Bicocca</orgName>
								<address>
									<settlement>Monza</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Daniele</forename><surname>Cavaleri</surname></persName>
							<email>d.cavaleri1@campus.unimib.it</email>
							<affiliation key="aff1">
								<orgName type="department">School of Medicine and Surgery</orgName>
								<orgName type="institution">University of Milano-Bicocca</orgName>
								<address>
									<settlement>Monza</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Giuseppe</forename><surname>Carrà</surname></persName>
							<email>giuseppe.carra@unimib.it</email>
							<affiliation key="aff1">
								<orgName type="department">School of Medicine and Surgery</orgName>
								<orgName type="institution">University of Milano-Bicocca</orgName>
								<address>
									<settlement>Monza</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
							<affiliation key="aff2">
								<orgName type="department">Division of Psychiatry</orgName>
								<orgName type="institution">University College London</orgName>
								<address>
									<settlement>London</settlement>
									<country key="GB">UK</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Gabriella</forename><surname>Pasi</surname></persName>
							<email>gabriella.pasi@unimib.it</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Informatics, Systems, and Communication</orgName>
								<orgName type="institution">University of Milano-Bicocca</orgName>
								<address>
									<settlement>Milan</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Marco</forename><surname>Viviani</surname></persName>
							<email>marco.viviani@unimib.it</email>
							<affiliation key="aff0">
								<orgName type="department">Department of Informatics, Systems, and Communication</orgName>
								<orgName type="institution">University of Milano-Bicocca</orgName>
								<address>
									<settlement>Milan</settlement>
									<country key="IT">Italy</country>
								</address>
							</affiliation>
						</author>
						<title level="a" type="main">Leveraging Prompt Engineering and Large Language Models for Automating MADRS Score Computation for Depression Severity Assessment</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">EC8F3E74965911B63B6BBF5AD3A834F8</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T16:57+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>Mental Health, MADRS, Prompt Engineering, Large Language Models, Natural Language Processing Orcid 0000-0002-7018-7515 (A. Raganato)</term>
					<term>0000-0003-2612-4119 (F. Bartoli)</term>
					<term>0000-0002-2979-2107 (C. Crocamo)</term>
					<term>0000-0001-5342-9394 (D. Cavaleri)</term>
					<term>0000-0002-6877-6169 (G. Carrà)</term>
					<term>0000-0002-6080-8170 (G. Pasi)</term>
					<term>0000-0002-2274-9050 (M. Viviani)</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>This study ventures into the field of psychiatry by investigating the interactive dynamics between psychiatrists and their patients. The primary goal is to create an automated scoring mechanism using prompt engineering techniques applied to Large Language Models (LLMs) to assess the severity of depressive symptoms from these dialogues. In particular, the process of generating a depression severity score against MADRS, a rating scale widely used in psychiatry, is automated. This work aims to highlight the potential of using these techniques to improve traditional diagnostic approaches in psychiatry. The results that have emerged, while not optimal, are promising, including for the purpose of developing a full-fledged system in the future to enable the introduction of more targeted and timely interventions, thereby improving patient outcomes and improving the overall level of mental health.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>The assessment of symptom severity plays a crucial role in the clinical management of mental disorders, being pivotal in diagnosing and monitoring the mental well-being of patients <ref type="bibr" target="#b0">[1]</ref>. Traditionally, this evaluation has heavily relied on clinical experience, sometimes supported by questionnaires and rating scales during in-person visits. However, advancements in Machine Learning (ML) and Natural Language Processing (NLP) techniques offer the potential for automated systems that can support in assessing measures of symptom severity in dialogues between psychiatrists and the growing number of patients. In particular, the evolving landscape of prompt engineering techniques applied to Large Language Models (LLMs) presents a novel avenue for developing such kind of systems, to better support psychiatric assessment practices in the future. This study, in particular, embarks on the task of automatically mapping psychiatrist-patient dialogue content to the Montgomery-Åsberg Depression Rating Scale (MADRS) <ref type="bibr" target="#b1">[2]</ref>, a widely accepted instrument for evaluating depression severity, through the potential of recently developed generative Artificial Intelligence (AI) models <ref type="bibr" target="#b2">[3]</ref>. To establish a foundation, a manual mapping process performed by clinical experts is employed to establish connections between question-answers from some psychiatrist-patient dialogues and the corresponding items of the MADRS questionnaire, together with the corresponding scores (both at the individual item level and the global level). This manual mapping serves as a benchmark for subsequent comparison with results obtained from the considered AI-based approaches.</p><p>In a first approach, distinct prompt engineering techniques applied to LLMs are leveraged to compute depression severity scores for each MADRS item. Each item is devoted to assessing a different symptom domain, such as sadness, inner tension, reduced sleep, etc., rated on a scale from 0 to 6, with higher scores indicating more severe depressive symptoms. The computed scores are then further aggregated to provide an overall assessment, ranging from 0 to 60, with higher scores indicating more severe depression. In a second approach, we evaluate the effectiveness of using prompts to directly compute the overall depression severity score.</p><p>This study serves as a preliminary step to explore the feasibility, in the future, of creating an advanced conversational system that generates questions and analyses responses to automatically assess symptom severity levels. The obtained results illustrate that the proposed approaches and the best models tested have an accuracy of about 70% in making the mapping between conversation and MADRS scores, with a pretty high correlation. While not optimal, this result appears encouraging in the belief that refinements on the models (via fine-tuning) and prompts could lead to higher results and pursuit of the goal of developing a fully automated system.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Related Work</head><p>The urgent need for innovation around access and quality of mental health care has become clear in the last few years <ref type="bibr" target="#b3">[4]</ref>. More and more mental health-related digital strategies for therapeutic approaches have been offered via ML and, in general, AI models, thus contributing to the development of detection systems for mental disorders, e.g., <ref type="bibr" target="#b4">[5,</ref><ref type="bibr" target="#b5">6,</ref><ref type="bibr" target="#b6">7]</ref>.</p><p>However, although significant progress has been made in the field, there are several barriers in the implementation of detection systems in real-world applications, including a need for increased transparency and replication <ref type="bibr" target="#b7">[8]</ref>. Moreover, the literature is sparse with a high degree of heterogeneity between studies and the use of nonstandardized metrics reporting <ref type="bibr" target="#b8">[9]</ref>. In addition, several areas remain understudied, including the use of these approaches among people suffering from mental disorders such as depression. Nonetheless, a few studies analyzed automated approaches for evaluating depression.</p><p>A recent study trained ML models to diagnose depression from spontaneous responses of 113 outpatients using interviews by experienced physicians that were first audio-recorded and transcribed verbatim. The study showed automated depression diagnosis based on interviews as a feasible approach <ref type="bibr" target="#b9">[10]</ref>. The use of transcribed autobiographical memory interviews was also considered for patients with treatment-resistant depression treated with psilocybin <ref type="bibr" target="#b10">[11]</ref>. Quantitative speech measures were computed using the interview data from 17 patients and 18 untreated age-matched healthy control subjects, and an ML algorithm was developed to classify between controls and patients and predict treatment response. Results showed that speech analytics and ML successfully differentiated individuals with depression from healthy controls and identified treatment responders from nonresponders with a significant level of accuracy and precision. More generally, question-based computational language assessment, based on self-reported and freely generated word responses, analyzed with AI, has been shown as a potential tool that may complement rating scales and evaluate mental health issues in clinical settings <ref type="bibr" target="#b11">[12]</ref>. A recent systematic review highlighted preliminary favorable evidence about the use of conversational agents (i.e., tools providing feedback to user input related to wellbeing and mental health queries) and their promising role in screening, assessment, diagnosis, and treatment of mental disorders, including the effective identification of people with depressive symptoms <ref type="bibr" target="#b7">[8,</ref><ref type="bibr" target="#b12">13,</ref><ref type="bibr" target="#b13">14]</ref>. For instance, discreet text interfaces possibly allowed participants to feel more comfortable using conversational agents in public <ref type="bibr" target="#b14">[15]</ref>.</p><p>Although these approaches appear to ensure optimal control over conversation flow and topics benefiting users and providers, a pre-defined response range may decrease usability in a diverse range of clinical settings with different risks such as possibly disrupting the therapeutic alliance <ref type="bibr" target="#b14">[15]</ref>. Indeed, a feasible option for developing a mass screening integrated approach for early detection of depression is intended as a means of assisting with automation and concealed communication with verified scoring systems rather than replacing clinical interviews <ref type="bibr" target="#b15">[16]</ref>. Moreover, the diversity of outcomes and the choice of outcome measurement instruments employed in studies on conversational agents for mental health point to the need for an established minimum core outcome set and greater use of validated instruments <ref type="bibr" target="#b16">[17]</ref>. Therefore, an enhanced personalization of conversational agents leveraging the interdisciplinary use of NLP techniques to better understand the context of the conversation about vulnerable experiences related to depressive symptoms -with a more human-like approach -appears desirable <ref type="bibr" target="#b17">[18]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Guiding LLMs to Automate MADRS Score Computation</head><p>LLMs are advanced AI systems <ref type="bibr" target="#b18">[19]</ref>, which possess the capability to generate human-like text across a wide range of topics, and thus seem to be the most suitable tool for solving the literature problem enunciated above. However, to accomplish a particular task, there is the need for a process for crafting specific instructions or prompts to guide these models; such a process is known as prompt engineering <ref type="bibr" target="#b19">[20]</ref>, and is gauging importance in recent years in medicine <ref type="bibr" target="#b20">[21]</ref>.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.1.">Basics of Prompt Engineering</head><p>The This means designing appropriate prompt templates for each prompting technique with respect to each approach.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.1.">Local Computation Approach</head><p>We ask LLMs appropriately guided by prompts to generate a score for each item of the MADRS. Such items and their descriptions are illustrated in Figure <ref type="figure" target="#fig_0">1</ref>, while ZS and CoT prompt templates are detailed in the following. Zero-Shot Learning. The model is simply asked to generate a score for each item of the MADRS. These items are specified in the template, as follows:</p><p>Given the following document containing a conversation between a physician and a patient, denoted by M and P respectively, following the Montgomery-Åsberg Depression Rating Scale (MADRS), answer me with the severity score, from a minimum of 0 (symptom absent) to a maximum of 6 (extremely severe), for the following item only: [item title, description]. Answer me only with a value between a minimum of 0 and a maximum of 6 related only to the described label. Below is the document to be analyzed:</p><formula xml:id="formula_0">[document].</formula><p>This template is repeated for each of the 10 items of MADRS, and [item title, description] contains the title and description shown in Figure <ref type="figure" target="#fig_0">1</ref> for each item, for example: Reduced sleep, representing the experience of reduced duration or depth of sleep compared to the subject's own normal pattern when well. Once the scores for each item are obtained, they are simply added together to obtain the overall score.</p><p>CoT Learning. In this preliminary work, the CoT approach is based on simply asking the model to provide a motivation before performing the task. This helps the model make a more informed decision than the ZS scenario. Therefore, the CoT template used is as follows:</p><p>[ZS "local" template] + Provide the rationale before answering.</p><p>Also in this case, the scores for each item are summed up to obtain the overall score.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.2.2.">Global Computation Approach</head><p>Here, LLMs are appropriately guided to directly generate the overall depression score with respect to MADRS.</p><p>Zero-Shot Learning. The ZS template employed in this global approach to computation is as follows:</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Given the following document containing a conversation between a physician and a patient, denoted by M and P respectively, following the Montgomery-Åsberg Depression Rating Scale (MADRS), answer me with what would be the severity score with respect to depression that you would assign. The threshold values are: 0 to 6 no depression, 7 to 19 mild depression, 20 to 34 moderate depression, and 35 to 60 severe depression. Answer only with a value between the minimum of 0 and a maximum of 60. Below is the document to be analyzed: [document].</head><p>CoT Learning. CoT learning in the global approach uses the ZS "global" template in which reasoning is required before providing the answer:</p><p>[ZS "global" template] + Provide the rationale before answering.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Comparative Evaluation</head><p>In this section, we present the results of the comparative evaluation of the local and global approaches, in relation to the various proposed prompt engineering techniques (and thus, regarding the different templates used). Firstly, we introduce the dataset employed in the evaluations and the technical characteristics of the implemented models.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">The Conversation Dataset</head><p>It is well understood, especially in such a delicate field as psychiatry, that dealing with patient data is rather complex and ethically sensitive. For this reason, for this preliminary study, a team of medical experts generated a small dataset in which clinicians took on the roles of both the doctor and the patient. This was done to create typical conversations regarding various levels of depression severity, namely: severe depression, moderate depression, mild depression, and absence of depression. In total, 10 doctor-patient conversations were generated in Italian, with at least 3 conversations for the first three previously outlined severity levels. Clinicians also labeled the questions and answers against the corresponding items of the MADRS and provided both item-level and global scores for the entire conversation.<ref type="foot" target="#foot_0">1</ref> </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Technical Details</head><p>To assess the effectiveness of generative models in addressing the considered problem, various LLMs were tested. These models were trained on diverse datasets, tailored for a multilingual context, given that our psychiatrist-patient conversations are in Italian. In particular, the following models were used: GPT-3.5: GPT-3.5-turbo-0613, it is an iteration of the Generative Pretrained Transformer (GPT) model developed by OpenAI. It is an advanced version of its predecessor, GPT-3, with improvements in various aspects such as model architecture, training data, and fine-tuning techniques. GPT-4: GPT-4-0613, it is a large multimodal model (accepting image and text inputs, emitting text outputs).<ref type="foot" target="#foot_1">2</ref> Mistral: Mistral-7B-Instruct-v0.2, it is an instruct fine-tuned 7B LLM, trained mainly on English data, but also acquainted with Italian during its pretraining phase <ref type="bibr" target="#b21">[22]</ref>. Mixtral: Mixtral-8x7B-Instruct-v0.1, it is a pretrained generative Sparse Mixture of Experts model, trained mainly on 5 languages including Italian. It has 46.7B total parameters but only uses 12.9B parameters per token.<ref type="foot" target="#foot_2">3</ref> Dante: DanteLLM_instruct_7b-v0.2-boosted, it is a recent state-of-the-art Italian LLM based on the 7B Mistral model. <ref type="foot" target="#foot_3">4</ref> Hermes: Hermes7b_ITA, it is a 7B LLM trained on a 120K instruction/answer dataset in Italian. It is based on Nous-Hermes-llama-2-7b LLM, a version of meta/Llama-2-7b fine-tuned to follow instructions. <ref type="foot" target="#foot_4">5</ref></p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.">Results</head><p>The results obtained measure the effectiveness of the above-mentioned models, in conjunction with the appropriate prompting templates, in correctly predicting the item-level scores and overall score of each conversation compared with those assigned by the medical experts. They are illustrated in terms of accuracy (Acc.), Pearson (P.), and Spearman (S.) correlation coefficients.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.1.">Local Computation Results</head><p>Tables <ref type="table" target="#tab_1">1 and 2</ref> show some results of the prompts and LLMs models applied to the local computation approach. It can be seen that from the results in Table <ref type="table" target="#tab_1">1</ref>, especially in terms of accuracy, the local approach does not provide satisfactory overall results. However, a substantial improvement can be appreciated when models are asked to explain the reasons for their choices (CoT), and in particular for the Hermes model. Regarding the correlation coefficients of Person and Spearman, we can observe how these are globally quite high, improving in the CoT scenario for models trained on larger amounts of data and decreasing on smaller ones.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Table 2</head><p>Correlation results for each MADRS item in the local CoT scenario. GPT-3.5 0.61 0.80 0.35 0.24 0.48 0.56 0.73 0.81 0.74 0.79 0.60 0.66 0.54 0.58 0.17 0.24 0.31 0.41 0.83 0.87 GPT-4 0.65 0.51 0.61 0.50 0.70 0.67 0.89 0.79 0.90 0.89 0.18 0.36 0.83 0.76 0.47 0.37 0.84 0.83 0.95 0.96 Mistral 0.15 0.20 0.64 0.78 0.53 0.21 0.71 0.79 0.21 0.20 0.40 0.54 -0.34 -0.37 0.31 0.31 0.82 0.82 0.94 0.93 Mixtral 0.46 0.48 0.91 0.88 0.73 0.43 0.76 0.69 0.84 0.90 0.21 0.35 0.72 0.64 -0.52 -0.36 0.36 0.39 0.83 0.87 Dante -0.49 0.49 0.66 0.68 0.75 0.47 0.50 -0.78 -0.76 -0.08 -0.08 -0.25 -0.05 -0.04 0.09 0.11 0.11 0.24 0.25 Hermes 0.57 0.56 -0.25 -0.61 0.06 0.24 0.07 0.01 -0.16 -0.22 -0.25 -0.32 0.30 0.17 0.24 0.16 0.18 0.29 -0.02 0.22</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.2.">Global Computation Results</head><p>Table <ref type="table" target="#tab_2">3</ref> shows the results of the prompts and LLMs models applied to the global computation approach. The results in this case show that an accuracy of around 70% can be achieved. It is particularly interesting to note how the best models are the GPT-based in the ZS case, while it is Dante in the CoT case, which instead turns out to be one of the worst using a ZS technique. Person and Spearman correlation coefficient results illustrate a significant increase in correlation in the smaller models in the CoT scenario, with variable fluctuations in the case of the larger models.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.3.3.">Further Investigating Best Results</head><p>Compared to the approaches, prompt engineering techniques, and LLMs considered, it is clear that the use of the global approach is superior to the local one. This would seem to suggest that LLMs have a greater chance of success with respect to the task considered when the conversation is considered to produce the global MADRS score, without the model being asked to generate MADRS item-based scores to be later aggregated. However, we operated in a context in which we did not provide specific examples of the model according to a Few-Shot strategy, which need to be investigated in the future.</p><p>As it emerges from Table <ref type="table">2</ref>, referring to the local computation approach in the CoT scenario, the correlation with respect to the scores predicted in the individual items is generally not very high, although it is objectively better in some specific items such as #4 (i.e., reduced sleep, for the models trained on more data), #10 (i.e., suicidal thoughts, again for larger models). The smaller, Italianspecific models do not correlate well on this task.</p><p>Concerning Figure <ref type="figure" target="#fig_1">2</ref>, illustrating the confusion matrix referring to the global computation approach for the Dante model performed in the CoT scenario, we can observe how the model does not confuse depression severity classes that are too distant from each other. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Conclusion and Future Research</head><p>This study explored the utilization of generative Artificial Intelligence (AI) models for automatically mapping psychiatrist-patient dialogue content to the Montgomery-Åsberg Depression Rating Scale (MADRS). Two distinct approaches were investigated: the application of prompt engineering techniques to compute symptom severity scores for each MADRS item, and the direct calculation of the overall depression severity score. The results demonstrated that the proposed approaches, coupled with the best-performing models, achieved an accuracy of approximately 70% in mapping conversations to MADRS scores.</p><p>Though the current accuracy shows promise, there is room for improvement. Future studies could refine models, improve prompt techniques, explore new methods, and use more data sources. This could lead to an automated system that generates questions and evaluates symptom severity from dialogue analysis.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: A detail on the 10 items, with related descriptions, that constitute the MADRS.</figDesc><graphic coords="3,89.29,370.17,203.36,269.99" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>Figure 2 :</head><label>2</label><figDesc>Figure 2: Dante's CoT global confusion matrix.</figDesc><graphic coords="5,302.62,335.21,203.36,152.52" type="bitmap" /></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head></head><label></label><figDesc>In ZS learning, the LLM is provided with a prompt (describing the task to be accomplished) without any examples or specific training data for that task. Despite this, the model attempts to generate a suitable response based solely on its understanding of the task description. FS learning extends ZS by providing the model with a small number of examples or demonstrations for the task at hand. These examples serve as additional context for the model to understand the task better. Finally, CoT prompts guide the model to generate coherent and logically connected responses by sequentially structuring the prompt. Each step of the prompt builds upon the previous one, creating a chain of thoughts that guide the model's generation process.Having made this necessary premise about prompt engineering, we can illustrate the two different approaches proposed in this article to perform the considered task, denoted as local and global. For both approaches, we consider ZS and CoT prompting techniques, being insufficient in the number of available examples in the considered dataset (detailed in Section 4.1) to perform FS.</figDesc><table><row><cell>3.2. Automated Score Computation</cell></row></table><note>main prompting techniques employed today in the literature are known as Zero-Shot (ZS), Few-Shot (FS), and Chain-of-Thought (CoT) learning.</note></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 1</head><label>1</label><figDesc>Overall results for the local computation approach.</figDesc><table><row><cell></cell><cell></cell><cell>ZS</cell><cell></cell><cell></cell><cell>CoT</cell><cell></cell></row><row><cell>Model</cell><cell>Acc.</cell><cell>P.</cell><cell>S.</cell><cell>Acc.</cell><cell>P.</cell><cell>S.</cell></row><row><cell>GPT-3.5</cell><cell>0.30</cell><cell>0.81</cell><cell>0.81</cell><cell>0.30</cell><cell>0.86</cell><cell>0.83</cell></row><row><cell>GPT-4</cell><cell>0.30</cell><cell>0.92</cell><cell>0.88</cell><cell>0.40</cell><cell>0.93</cell><cell>0.90</cell></row><row><cell>Mistral</cell><cell>0.30</cell><cell>0.70</cell><cell>0.69</cell><cell>0.40</cell><cell>0.85</cell><cell>0.91</cell></row><row><cell>Mixtral</cell><cell>0.40</cell><cell cols="2">0.92 0.91</cell><cell>0.40</cell><cell>0.86</cell><cell>0.87</cell></row><row><cell>Dante</cell><cell>0.30</cell><cell>0.47</cell><cell>0.42</cell><cell>0.40</cell><cell>0.27</cell><cell>0.16</cell></row><row><cell>Hermes</cell><cell>0.40</cell><cell>0.51</cell><cell>0.54</cell><cell>0.60</cell><cell>0.31</cell><cell>0.15</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_2"><head>Table 3</head><label>3</label><figDesc>Overall results for the global computation approach.</figDesc><table><row><cell></cell><cell></cell><cell>ZS</cell><cell></cell><cell></cell><cell>CoT</cell><cell></cell></row><row><cell>Model</cell><cell>Acc.</cell><cell>P.</cell><cell>S.</cell><cell>Acc.</cell><cell>P.</cell><cell>S.</cell></row><row><cell>GPT-3.5</cell><cell>0.70</cell><cell>0.66</cell><cell>0.62</cell><cell>0.60</cell><cell>0.79</cell><cell>0.71</cell></row><row><cell>GPT-4</cell><cell>0.60</cell><cell>0.96</cell><cell>0.94</cell><cell>0.40</cell><cell>0.87</cell><cell>0.82</cell></row><row><cell>Mistral</cell><cell>0.20</cell><cell>0.47</cell><cell>0.23</cell><cell>0.60</cell><cell>0.22</cell><cell>0.51</cell></row><row><cell>Mixtral</cell><cell>0.50</cell><cell>0.43</cell><cell>0.57</cell><cell>0.50</cell><cell>0.33</cell><cell>0.20</cell></row><row><cell>Dante</cell><cell>0.30</cell><cell>-0.03</cell><cell>0.13</cell><cell>0.70</cell><cell>0.68</cell><cell>0.86</cell></row><row><cell>Hermes</cell><cell>0.30</cell><cell>0.31</cell><cell>0.47</cell><cell>0.50</cell><cell>0.76</cell><cell>0.64</cell></row></table></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">The dataset used and the respective labels and scores can be downloaded at the following address: https://drive.google.com/file/d/ 18HL5v8Hh2GBm1l0dt9Z8cHW0Opy8JgA7/view?usp=sharing.</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">https://platform.openai.com/docs/models/overview</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">https://mistral.ai/news/mixtral-of-experts/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">https://github.com/RSTLess-research/DanteLLM</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">https://huggingface.co/raicrits/Hermes7b_ITA</note>
		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">The american psychiatric association practice guidelines for the psychiatric evaluation of adults</title>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">J</forename><surname>Silverman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Galanter</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Jackson-Triche</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">G</forename><surname>Jacobs</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">W</forename><surname>Lomax</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">B</forename><surname>Riba</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">D</forename><surname>Tong</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">E</forename><surname>Watkins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">J</forename><surname>Fochtmann</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">S</forename><surname>Rhoads</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">American Journal of Psychiatry</title>
		<imprint>
			<biblScope unit="volume">172</biblScope>
			<biblScope unit="page" from="798" to="802" />
			<date type="published" when="2015">2015</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">The self-reported montgomery-åsberg depression rating scale is a useful evaluative tool in major depressive disorder</title>
		<author>
			<persName><forename type="first">B</forename><surname>Fantino</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Moore</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">BMC psychiatry</title>
		<imprint>
			<biblScope unit="volume">9</biblScope>
			<biblScope unit="page" from="1" to="6" />
			<date type="published" when="2009">2009</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">The potential of generative artificial intelligence across disciplines: Perspectives and future directions</title>
		<author>
			<persName><forename type="first">K.-B</forename><surname>Ooi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><forename type="middle">W</forename><surname>.-H. Tan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Al-Emran</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Al-Sharafi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Capatina</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Chakraborty</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><forename type="middle">K</forename><surname>Dwivedi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">T.-L</forename><surname>Huang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">K</forename><surname>Kar</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V.-H</forename><surname>Lee</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Computer Information Systems</title>
		<imprint>
			<biblScope unit="page" from="1" to="32" />
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<monogr>
		<author>
			<persName><forename type="first">J</forename><surname>Torous</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><forename type="middle">J</forename><surname>Myrick</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Rauseo-Ricupero</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Firth</surname></persName>
		</author>
		<title level="m">Digital mental health and covid-19: using technology to accelerate the curve on access and quality tomorrow</title>
				<imprint>
			<publisher>JMIR mental health</publisher>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<analytic>
		<title level="a" type="main">Machine learning and prediction in psychological assessment</title>
		<author>
			<persName><forename type="first">M</forename><surname>Fokkema</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Iliescu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Greiff</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ziegler</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">European Journal of Psychological Assessment</title>
		<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">A survey of machine learning techniques in physiology based mental stress detection systems</title>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">S</forename><surname>Panicker</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Gayathri</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Biocybernetics and Biomedical Engineering</title>
		<imprint>
			<biblScope unit="volume">39</biblScope>
			<biblScope unit="page" from="444" to="469" />
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<monogr>
		<author>
			<persName><forename type="first">M</forename><surname>Viviani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Crocamo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Mazzola</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Bartoli</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Carrà</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Pasi</surname></persName>
		</author>
		<title level="m">Assessing vulnerability to psychological distress during the covid-19 pandemic through the analysis of microblogging content</title>
				<imprint>
			<publisher>Future Generation Computer Systems</publisher>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<analytic>
		<title level="a" type="main">Chatbots and conversational agents in mental health: a review of the psychiatric landscape</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">N</forename><surname>Vaidyam</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Wisniewski</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">D</forename><surname>Halamka</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">S</forename><surname>Kashavan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">B</forename><surname>Torous</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">The Canadian Journal of Psychiatry</title>
		<imprint>
			<biblScope unit="volume">64</biblScope>
			<biblScope unit="page" from="456" to="464" />
			<date type="published" when="2019">2019</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b8">
	<monogr>
		<title level="m" type="main">Chatbots in the field of mental health: challenges and opportunities, Digital Mental Health: A Practitioner&apos;s Guide</title>
		<author>
			<persName><forename type="first">A</forename><surname>Viduani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Cosenza</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">M</forename><surname>Araújo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Kieling</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2023">2023</date>
			<biblScope unit="page" from="133" to="148" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">A systematic review on automated clinical depression diagnosis</title>
		<author>
			<persName><forename type="first">K</forename><surname>Mao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Chen</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">npj Mental Health Research</title>
		<imprint>
			<biblScope unit="volume">2</biblScope>
			<biblScope unit="page">20</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">Natural speech algorithm applied to baseline interview data can predict which patients will respond to psilocybin for treatment-resistant depression</title>
		<author>
			<persName><forename type="first">F</forename><surname>Carrillo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Sigman</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">F</forename><surname>Slezak</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Ashton</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Fitzgerald</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Stroud</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">J</forename><surname>Nutt</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><forename type="middle">L</forename><surname>Carhart-Harris</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of affective disorders</title>
		<imprint>
			<date type="published" when="2018">2018</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Freely generated word responses analyzed with artificial intelligence predict self-reported symptoms of depression, anxiety, and worry</title>
		<author>
			<persName><forename type="first">K</forename><surname>Kjell</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Johnsson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Sikström</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Frontiers in Psychology</title>
		<imprint>
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">Virtual human as a new diagnostic tool, a proof of concept study in the field of major depressive disorders</title>
		<author>
			<persName><forename type="first">P</forename><surname>Philip</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J.-A</forename><surname>Micoulaud-Franchi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><surname>Sagaspe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">D</forename><surname>Sevin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Olive</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Bioulac</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Sauteraud</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Scientific reports</title>
		<imprint>
			<biblScope unit="volume">7</biblScope>
			<biblScope unit="page">42656</biblScope>
			<date type="published" when="2017">2017</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b13">
	<analytic>
		<title level="a" type="main">Artificial intelligence chatbot for depression: descriptive study of usage</title>
		<author>
			<persName><forename type="first">G</forename><surname>Dosovitsky</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><forename type="middle">S</forename><surname>Pineda</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><forename type="middle">C</forename><surname>Jacobson</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">E</forename><forename type="middle">L</forename><surname>Bunge</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">JMIR Formative Research</title>
		<imprint>
			<biblScope unit="volume">4</biblScope>
			<biblScope unit="page">e17065</biblScope>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b14">
	<analytic>
		<title level="a" type="main">Changes to the psychiatric chatbot landscape: A systematic review of conversational agents in serious mental illness: Changements du paysage psychiatrique des chatbots: une revue systématique des agents conversationnels dans la maladie mentale sérieuse</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">N</forename><surname>Vaidyam</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Linggonegoro</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Torous</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">The Canadian Journal of Psychiatry</title>
		<imprint>
			<biblScope unit="volume">66</biblScope>
			<biblScope unit="page" from="339" to="348" />
			<date type="published" when="2021">2021</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b15">
	<analytic>
		<title level="a" type="main">Early detection of depression using a conversational ai bot: A non-clinical trial</title>
		<author>
			<persName><forename type="first">P</forename><surname>Kaywan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Ahmed</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Ibaida</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Miao</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Gu</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Plos one</title>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b16">
	<analytic>
		<title level="a" type="main">Evaluating conversational agents for mental health: Scoping review of outcomes and outcome measurement instruments</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">I</forename><surname>Jabir</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Martinengo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Lin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Torous</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Subramaniam</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><forename type="middle">Tudor</forename><surname>Car</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">J Med Internet Res</title>
		<imprint>
			<biblScope unit="volume">25</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b17">
	<analytic>
		<title level="a" type="main">Chatbot features for anxiety and depression: a scoping review</title>
		<author>
			<persName><forename type="first">A</forename><surname>Ahmed</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Hassan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Aziz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">A</forename><surname>Abd-Alrazaq</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Ali</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Alzubaidi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Al-Thani</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Elhusein</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">A</forename><surname>Siddig</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Ahmed</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Health informatics journal</title>
		<imprint>
			<biblScope unit="volume">29</biblScope>
			<biblScope unit="page">14604582221146719</biblScope>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b18">
	<analytic>
		<title level="a" type="main">A survey on evaluation of large language models</title>
		<author>
			<persName><forename type="first">Y</forename><surname>Chang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Wu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Yang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">K</forename><surname>Zhu</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">X</forename><surname>Yi</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Wang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Y</forename><surname>Wang</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">ACM Transactions on Intelligent Systems and Technology</title>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b19">
	<monogr>
		<author>
			<persName><forename type="first">P</forename><surname>Sahoo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">K</forename><surname>Singh</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Saha</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Jain</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Mondal</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Chadha</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2402.07927</idno>
		<title level="m">A systematic survey of prompt engineering in large language models: Techniques and applications</title>
				<imprint>
			<date type="published" when="2024">2024</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b20">
	<analytic>
		<title level="a" type="main">Prompt engineering as an important emerging skill for medical professionals: tutorial</title>
		<author>
			<persName><forename type="first">B</forename><surname>Meskó</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Journal of Medical Internet Research</title>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b21">
	<monogr>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">Q</forename><surname>Jiang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Sablayrolles</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Mensch</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Bamford</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><forename type="middle">S</forename><surname>Chaplot</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Casas</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Bressand</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Lengyel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Lample</surname></persName>
		</author>
		<author>
			<persName><forename type="first">L</forename><surname>Saulnier</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2310.06825</idno>
		<title level="m">Mistral 7b</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
	<note type="report_type">arXiv preprint</note>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
