<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-title-group>
        <journal-title>G. M. Orlando);</journal-title>
      </journal-title-group>
    </journal-meta>
    <article-meta>
      <title-group>
        <article-title>Diagnosis in Italian Through a Retrieval-Augmented Medical Chatbot</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <string-name>Naples</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Italy</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Mariano Barone</string-name>
          <email>mariano.barone@unina.it</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Gian Marco Orlando</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Marco Perillo</string-name>
          <email>marc.perillo@studenti.unina.it</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Giuseppe Riccio</string-name>
          <email>giuseppe.riccio3@unina.it</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Antonio Romano</string-name>
          <email>antonio.romano5@unina.it</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Diego Russo</string-name>
          <email>diego.russo@unibg.it</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff3">3</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Ferdinando Tammaro</string-name>
          <email>fer.tammaro@studenti.unina.it</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Valerio La Gatta</string-name>
          <email>valerio.lagatta@northwestern.edu</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Marco Postiglione</string-name>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <string-name>Vincenzo Moscato</string-name>
          <email>vincenzo.moscato@unina.it</email>
          <xref ref-type="aff" rid="aff0">0</xref>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <aff id="aff0">
          <label>0</label>
          <institution>24044</institution>
          ,
          <country country="IT">Italy</country>
        </aff>
        <aff id="aff1">
          <label>1</label>
          <institution>Evanston</institution>
          ,
          <addr-line>IL 60208</addr-line>
          ,
          <country country="US">United States</country>
        </aff>
        <aff id="aff2">
          <label>2</label>
          <institution>Northwestern University, Department of Computer Science, McCormick School of Engineering and Applied Science</institution>
          ,
          <addr-line>2233 Tech Dr</addr-line>
        </aff>
        <aff id="aff3">
          <label>3</label>
          <institution>University of Bergamo, Department of Management, Information and Production Engineering</institution>
          ,
          <addr-line>Via Pasubio 7b, Dalmine, BG</addr-line>
        </aff>
        <aff id="aff4">
          <label>4</label>
          <institution>University of Naples Federico II, Department of Electrical Engineering and Information Technology (DIETI)</institution>
          ,
          <addr-line>Via Claudio, 21 -</addr-line>
        </aff>
      </contrib-group>
      <pub-date>
        <year>2025</year>
      </pub-date>
      <volume>000</volume>
      <fpage>9</fpage>
      <lpage>0009</lpage>
      <abstract>
        <p>2Consorzio Interuniversitario Nazionale per l'Informatica (CINI) - ITEM National Lab, Complesso Universitario Monte S.Angelo, The integration of Big Data and Artificial Intelligence in healthcare ofers significant potential to address the growing need for supporting clinical medical consultation systems. However, many existing solutions struggle to efectively utilize unstructured medical data and provide contextually relevant responses to user queries. This paper addresses this gap by presenting the architecture of an AI-driven medical chatbot based on RetrievalAugmented Generation. The system leverages data from Italian medical forums and encyclopedias to ofer preliminary diagnoses and treatment suggestions. Our approach integrates a retrieval mechanism with a large language model, enhanced by query expansion techniques for improving retrieval accuracy and reranking methods to prioritize the most relevant information. The efectiveness of the system is demonstrated through both qualitative and quantitative evaluations, showcasing improvements in user experience and response precision. We publicly release our code on GitHub: https://github.com/PRAISELab-PicusLab/RAGMedicalChatbot.</p>
      </abstract>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>1. Introduction</title>
      <p>The advent of artificial intelligence (AI) and Big Data has profoundly transformed the healthcare
landscape, ofering new ways to process and analyze large amounts of unstructured data from medical
forums, research papers, and medical records. However, current AI systems, in particular large language
models (LLMs), have significant limitations, including the risk of generating hallucinated or inaccurate
information, which undermines their reliability, or the problems of interoperability and transparency
that are crucial in critical sectors such as healthcare [1].</p>
      <p>This paper introduces a medical chatbot system that leverages retrieval-augmented generation (RAG)
techniques to address these shortcomings. By integrating natural language processing (NLP) with
RAG, the chatbot generates personalized responses based on user-reported symptoms while retrieving
∗Corresponding author.</p>
      <p>CEUR
Workshop</p>
      <p>ISSN1613-0073
relevant, verified medical data from external sources. This dual approach not only enhances response
accuracy but also grounds the system’s outputs in factual content, significantly reducing the risk of
misinformation [2]. In particular, the system incorporates trusted Italian medical knowledge bases such
as MedicItalia1 and the Humanitas2 encyclopedia. Despite the promising results of RAG-based solutions
in various domains [3], their use in the Italian medical context remains underexplored. Moreover, most
existing systems fail to integrate domain-specific retrieval with language generation while maintaining
traceability and source citation, especially in non-English clinical data. By addressing these gaps,
the proposed chatbot not only delivers preliminary diagnostic suggestions but also provides explicit
references to authoritative medical sources, thereby enhancing both transparency and user confidence.</p>
      <p>The system ofers practical benefits for both patients and healthcare professionals. Patients gain
immediate access to reliable, tailored medical insights, while doctors can use the chatbot as a preliminary
assessment tool to streamline decision-making process [4]. By reducing diagnostic errors and improving
healthcare eficiency, this approach demonstrates the potential of AI to personalize healthcare delivery
and support informed medical decisions.</p>
    </sec>
    <sec id="sec-2">
      <title>2. Related Work</title>
      <p>Large Language Models (LLMs) have shown significant versatility across various domains, including
legal document retrieval [5], knowledge graph construction from geopolitical corpora [6], and
agentbased simulations with generative reasoning capabilities [7]. Recent advancements have extended
LLMbased generative agents to applications in social simulation and decision support, such as modeling the
Friendship Paradox in online social networks [8], detecting insider threats through agent coordination
[9], and supporting fact-checking via diverse and structured agent collectives [10].</p>
      <p>In the biomedical domain, LLM integration has yielded systems like PIE-Med [11], which combines
generative reasoning with graph inference to deliver explainable and interpretable medical
recommendations. Other approaches have leveraged medical entity recognition and generative summarization to
improve the accessibility and structure of clinical records [12]. Complementary to LLM-based solutions,
diversity-aware recommender mechanisms have been proposed to mitigate over-specialization and
promote exploratory recommendations in health-related contexts [13].</p>
      <p>Retrieval-Augmented Generation (RAG) has emerged as a promising paradigm for improving factual
grounding in healthcare NLP systems by integrating external retrieval with generative models. However,
challenges related to domain specificity, hallucination, and contextual accuracy remain prominent.
General limitations of RAG—including grounding instability—are analyzed in [14], while the hallucination
risks of healthcare-focused chatbots are specifically examined in [ 15]. Addressing these issues, the
present study focuses on Italian clinical discourse, incorporating domain-adapted retrieval and semantic
query expansion to improve factual accuracy and linguistic appropriateness.</p>
      <p>Several works have explored architectural strategies to overcome RAG limitations. Clinical data
extraction from electronic health records (EHRs) has been attempted using RAG pipelines, though
limited domain integration remains a bottleneck [16]. Reranking-based refinement methods have been
introduced to enhance answer precision [17], though performance in open-ended clinical settings
continues to face constraints. Contextual retrieval mechanisms have also been explored to narrow the
relevance gap, yet scalability remains a concern [18].</p>
      <p>Multilingual adaptability constitutes a critical research axis in medical NLP. Results from [19]
demonstrate that language-specific fine-tuning and data augmentation significantly enhance disorder
identification performance in non-English clinical corpora. These findings support the relevance of localization
strategies, as adopted in this work.</p>
      <p>In summary, although prior research has advanced the integration of LLMs and RAG in clinical
applications, key challenges persist around scalability, reliability, and language adaptation. This study
addresses these limitations by combining semantic query expansion, source-linked summarization, and</p>
      <sec id="sec-2-1">
        <title>1https://www.medicitalia.it/ 2https://www.humanitas.it/</title>
        <p>Query
mi fa male la testa</p>
        <p>Generate
RERANKED DOCUMENTS</p>
        <p>D1 D2 D3 ... Dn</p>
        <p>LLM
Reranking</p>
        <p>BM25</p>
        <p>...</p>
        <p>SUMMARIZED
RERANKED DOCUMENTS Summarization</p>
        <p>D1 D2 D3 ... Dn</p>
        <p>LLM
mi fa male la testa
ho mal di testa OR dolore alla testa OR cefalea OR emicrania
DOCUMENTS</p>
        <p>...</p>
        <p>LLM</p>
        <p>Retrieval
Embedder
EMBEDDING</p>
        <p>Vector</p>
        <p>Database
Answer</p>
        <p>Response
Source 1
Source 2</p>
        <p>...</p>
        <p>Source N</p>
        <p>NoSQL</p>
        <p>DB
d
an g
llitceoon irecsson MeWdeicbalItFaoliarunms
taaC repP
D Medical EItanlciaynclopaedia
reranking to support trustworthy and context-aware response generation in Italian-language clinical
scenarios.</p>
      </sec>
    </sec>
    <sec id="sec-3">
      <title>3. Methodology</title>
      <p>
        The medical chatbot developed in this project utilizes RAG and consists of several key components:
starting from data collection of Italian medical forums and Italian encyclopedia, to the generation of
responses based on contextually relevant information retrieved from a knowledge base. As shown
in Figure 1 the system architecture consists of five main phases: (
        <xref ref-type="bibr" rid="ref1">1</xref>
        ) query expansion, (
        <xref ref-type="bibr" rid="ref2">2</xref>
        ) document
retrieval, (
        <xref ref-type="bibr" rid="ref3">3</xref>
        ) reranking, (
        <xref ref-type="bibr" rid="ref4">4</xref>
        ) summarization, and (
        <xref ref-type="bibr" rid="ref5">5</xref>
        ) response generation. The Figure shows how the
user query is first expanded with synonyms to improve recall. Relevant documents are retrieved from
a vector database using cosine similarity and reranked via the BM25 algorithm. These documents are
then summarized and used to construct the final diagnostic prompt, ensuring contextual relevance and
citation transparency.
      </p>
      <sec id="sec-3-1">
        <title>3.1. Data Collection and Preprocessing</title>
        <p>The system collects data from Italian medical forums such as MedicItalia, where users post
healthrelated questions and licensed doctors provide responses, as well as from the Italian certified medical
encyclopedia Humanitas. This unstructured data was collected via scraping for research purposes only.
To ensure transparency and traceability, each extracted Question/Answer pair includes its original
source URL. The collected data is systematically categorized and transformed into a chatbot-compatible
format using an ETL (Extract, Transform, Load) pipeline, designed for scalable management of large
text corpora typical of Big Data environments. Subsequently, forum interactions are structured into
a Question/Answer format. Long questions are segmented into smaller units to enhance processing
eficiency. This organization enables the creation of embeddings, which are essential for the retrieval of
relevant content and the generation of accurate responses. Finally, document embeddings are generated
and stored in a vector database to optimize real-time retrieval during response generation.</p>
      </sec>
      <sec id="sec-3-2">
        <title>3.2. Retrieval-Augmented Generation (RAG)</title>
        <p>The RAG technique integrates document retrieval with language generation. User queries are first
transformed into embeddings. To optimize the retrieval process, we evaluated several embedding
strategies, such as embedding the concatenated string of both the question and the answer (the entire
forum post) versus embedding only the user’s question.</p>
        <p>We ultimately chose to embed only the question to minimize context loss, which can occur when
using concatenated strings that may be too long, include technical jargon from doctors’ responses,
or contain irrelevant information in the answers. The retrieval phase leverages cosine similarity to
compare the user query embedding with pre-stored document embeddings in the vector database,
returning the top five relevant documents for optimal system performance.</p>
      </sec>
      <sec id="sec-3-3">
        <title>3.3. Query Expansion and Reranking</title>
        <p>
          In order to enhance the retrieval process, the user’s query undergoes an expansion process using a
generative language model that introduces synonyms and contextually related terms. During this Query
Expansion step, the model analyzes the query to identify key concepts and semantic relationships,
generating alternative phrases that improve the chances of capturing relevant documents. For example,
if the user queries “symptoms of diabetes”, the model may expand it to include terms like “signs of
diabetes”, “glucose levels”, or “insulin resistance”. This step significantly increases the likelihood of
retrieving accurate medical responses. Once the relevant documents are retrieved, a reranking step is
applied to order the top 15 documents based on the BM25 scoring algorithm. BM25 computes a relevance
score (
          <xref ref-type="bibr" rid="ref1">1</xref>
          ) for each document by considering factors such as term frequency (TF), inverse document
frequency (IDF), and document length normalization.
(
          <xref ref-type="bibr" rid="ref1">1</xref>
          )
(
          <xref ref-type="bibr" rid="ref2">2</xref>
          )
score(, ) =

∑ IDF(  ) ⋅
=1
        </p>
        <p>(  , ) ⋅ ( 1 + 1)
 (  , ) +  1 ⋅ (1 −  +  ⋅
||
avgdl )
Where query  : contains the terms  1,  2, … ,   ; document  : the document to be evaluated; frequency:
 (  , ) represents the frequency of the term   within document  ; constant parameters:  1 and  are
parameters that control the model’s behavior; average document length: avgdl is the average length of
documents in the collection. The Inverse Document Frequency (IDF) is calculated as:
IDF(  ) = ln (
 − (  ) + 0.5
(  ) + 0.5
) + 1
where  is the total number of documents in the collection; (  )is the number of documents containing
the term   .
are prioritized for the user.</p>
        <p>This allows the algorithm to evaluate how well each document matches the expanded query. The
system then selects the top five most relevant documents, ensuring that the highest-quality responses</p>
      </sec>
      <sec id="sec-3-4">
        <title>3.4. Summarization and Response Generation</title>
        <p>To further improve the quality of responses, the retrieved medical documents are summarized using a
large language model that extracts the most critical parts of the doctors’ responses. This summarization
reduces the length of the response while correcting syntactic and grammatical issues often present in
forum posts, as well as eliminating irrelevant or redundant content. Despite the computational overhead,
this step plays a pivotal role in improving the relevance and fluency of the generated responses. It is
crucial to note that, during the response generation, the sources utilized are clearly indicated. This
allows users to immediately access the original documents from which the information is derived,
ensuring transparency and reliability in the responses provided by the chatbot.</p>
        <p>Following summarization, the documents are reformatted into a standardized schema that separates
the original user question and the corresponding medical response, as illustrated in Figure 2.</p>
        <p>Document Structure
DOCUMENT:
• Question: [User’s question]
• Answer: [Doctor’s response]
• Source: [URL]
Final Prompt
DOCUMENT:
Question:
Answer:
. . .</p>
        <p>DOCUMENT:
Question:</p>
        <p>Answer:</p>
        <p>As shown in Figure 3, the final prompt, to be provided to the language model, is constructed by
concatenating the processed user query with the standardized documents, which contain relevant
answers given by physicians in similar cases.</p>
        <p>Query: [User’s query]
My symptoms are: [User’s symptoms]
Formulate my diagnosis by consulting the following answers given by doctors to other users if they
contain information relevant to me:</p>
        <p>This structured approach, which combines summarization and the sourcing of medical information,
not only enhances the chatbot’s response accuracy but also fosters trust with users by ensuring
transparency.</p>
      </sec>
    </sec>
    <sec id="sec-4">
      <title>4. Experiments</title>
      <sec id="sec-4-1">
        <title>4.1. Experimental setup</title>
        <p>All experiments were conducted on a local machine with an NVIDIA RTX 3070 GPU (8 GB VRAM). Due
to hardware limitations, we selected generative ”instruct” models with fewer than 8 billion parameters
in their quantized versions, as well as embedding models with fewer than 600 million parameters.</p>
        <p>For the embedding task, we used the multilingual-e5-large model3, a multilingual model with 560</p>
        <sec id="sec-4-1-1">
          <title>3https://huggingface.co/intfloat/multilingual-e5-large</title>
          <p>million parameters that supports 94 languages, including Italian. This model has been found to be
particularly efective in capturing context during information retrieval from information sources.</p>
          <p>For query expansion, summarization and text generation, we employed the LLaMA-3-8B-Instruct
model4 [20] with top_k = 0.9 and temperature = 0.3, which provided coherent and contextually rich
responses in Italian despite its 8k token context window. The system architecture integrated ChromaDB5
for eficient embedding storage and retrieval, while the NoSQL database MongoDB 6 (used in Python) is
used to persistently store conversations extracted from relevant forums and encyclopaedias.</p>
        </sec>
      </sec>
      <sec id="sec-4-2">
        <title>4.2. Dataset</title>
        <p>The dataset used as the knowledge base for the RAG phase was collected from Italian medical forums
with a total of 268,019 conversations between physicians and patients. In total there are 65 medical
categories to which discussions on these forums belong, and the most covered are certain areas such as
Psychology, Gastroenterology and Digestive Endoscopy, and Infectious Diseases. In addition, articles
from Italian medical encyclopedias have also been collected, with a total of 2,981 articles, most of which
cover the field of general medicine.</p>
      </sec>
      <sec id="sec-4-3">
        <title>4.3. Results</title>
        <p>4.3.1. Quantitative Evaluation
The chatbot’s performance was quantitatively evaluated using a diverse test suite consisting of realistic
patient queries modeled after typical health-related interactions. Three generation strategies were
analyzed:
•  1: No_RAG — baseline with no external document retrieval or augmentation;
•  2: R+Q+RR — Retrieval-Augmented Generation enhanced with Query Expansion and BM25-based</p>
        <p>Reranking;
•  3: R+Q+RR+S — the previous strategy with an additional Summarization step.</p>
        <p>The test set was constructed manually by taking 50 samples at random from the original dataset. The
informativeness of generated responses was first evaluated using the TF-IDF metric, which quantifies
term frequency normalized by document relevance. As reported in Table 1, strategy  3 achieved the
highest score (10.7269), indicating the greatest lexical density and coverage of medically relevant content.
Strategy  2 followed closely (10.4144), while the baseline  1 obtained the lowest score (9.2093).</p>
        <p>
          To better characterize the diferences in informativeness between strategies, we introduce a novel
metric—the Informative Diference Matrix (IDM)—presented in Table 2. This metric adjusts the raw
diference in TF-IDF scores by incorporating textual similarity via ROUGE-L [ 21], defined as:
 , = (1 − ROUGE-L, ) ⋅ ( tfidf , −  tfidf , )
(
          <xref ref-type="bibr" rid="ref3">3</xref>
          )
        </p>
        <p>Here,  , quantifies the net gain in informativeness from strategy   over   , penalized by overlapping
structure. Positive values suggest that   ofers more distinctive and rich content compared to   . Manual</p>
        <sec id="sec-4-3-1">
          <title>4https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct 5https://www.trychroma.com/ 6https://www.mongodb.com/</title>
          <p>inspection of high-scoring  , pairs confirmed that higher values correlated with more context-specific
and clinically informative answers.</p>
          <p>To complement these lexical evaluations, we employed BERTScore [22], a semantic similarity metric
based on transformer embeddings. Using the multilingual model
paraphrase-multilingual-MiniLML12-v2 [23], we computed average F1 scores across all outputs. Results indicate that strategies  2 and
 3 ofer significantly better alignment with expected clinical answers, with  3 showing the highest
semantic relevance (0.8914).</p>
          <p>These findings confirm that the proposed enhancements—query expansion, reranking, and
summarization—substantially improve both the surface-level informativeness and deeper semantic coherence
of the generated content.</p>
          <p>Key performance metrics are summarized as follows:
• Response Time: Average of 30 seconds per query, ranging from 25s (R+Q+RR) to 37s
(R+Q+RR+S).
• Retrieval Accuracy: 86% of test queries successfully returned at least one medically relevant
document.
• Structural Similarity: Measured using ROUGE-L [21] and BLEU [24] against reference phrasing.
• Semantic Preservation: Assessed with BERTScore [22], highlighting improved contextual
alignment.
4.3.2. Qualitative Evaluation
Subject matter experts evaluated the chatbot’s responses, finding that RAG-generated replies provided
more precise diagnostic suggestions than those without RAG. The inclusion of medical references
boosted user trust. Queries were tested in three modes: Base LLM, LLM with RAG, and LLM with full
processing. RAG improved relevance through query expansion and reranking. Responses without RAG
were often vague, while RAG-enabled responses ofered preliminary diagnoses and treatments. Response
times ranged from 25 to 45 seconds, with query expansion adding 2 to 4 seconds and summarization
involving up to 7 seconds per step. To balance speed and accuracy, the chatbot ofers two modes: Fast
(no summarization) and Precise (full processing).
4.3.3. System Demonstration</p>
        </sec>
      </sec>
    </sec>
    <sec id="sec-5">
      <title>5. Ethical Considerations</title>
      <p>While AI-driven medical chatbots ofer numerous advantages, they also raise significant ethical concerns
that must be addressed. One of the most critical issues is the risk of misdiagnosis or misinformation,
particularly when users rely solely on the system’s recommendations without consulting a healthcare
professional. Data privacy is another key concern, given the sensitive nature of the medical information
being processed. To mitigate these risks, the system must clearly inform users that it provides only
preliminary guidance and is not a substitute for professional diagnosis. By also attaching sources
to its responses, the system enhances transparency, allowing users to verify the credibility of the
information and fostering greater trust through the ability to explore reliable medical sources. To ensure
accountability, each response includes links to original medical forum threads and encyclopedia articles.
Additionally, the chatbot explicitly states it is not a replacement for professional medical consultation
and does not store user data.</p>
    </sec>
    <sec id="sec-6">
      <title>6. Conclusion and Future direction</title>
      <p>This paper presented the implementation of an AI-based medical chatbot utilizing RAG techniques
and Big Data technologies. The system’s ability to provide relevant medical responses, supported by
reliable sources, makes it a valuable tool for both preliminary consultations and patient education,
helping guide users towards appropriate medical professionals. Future work will focus on improving
the chatbot’s consultation accuracy by integrating larger and more diverse datasets from certified,
high-quality sources, as well as adopting more advanced generative models. Additionally, a promising
direction for further development is the integration of real-time medical data from wearable devices and
electronic health records, with the aim of making the chatbot not only a useful resource for preliminary
consultations but also a supportive diagnostic tool for healthcare professionals.</p>
    </sec>
    <sec id="sec-7">
      <title>Acknowledgments</title>
      <p>
        This work was conducted with the financial support of (
        <xref ref-type="bibr" rid="ref1">1</xref>
        ) the PNRR MUR project PE0000013-FAIR and
(
        <xref ref-type="bibr" rid="ref2">2</xref>
        ) the Italian ministry of economic development, via the ICARUS (Intelligent Contract Automation for
Rethinking User Services) project (CUP: B69J23000270005).
      </p>
    </sec>
    <sec id="sec-8">
      <title>Declaration on Generative AI</title>
      <p>During the preparation of this work, the author(s) used ChatGPT and DeepL in order to: Grammar and
spelling check. After using these tool(s)/service(s), the author(s) reviewed and edited the content as
needed and take(s) full responsibility for the publication’s content.
[11] A. Romano, G. Riccio, M. Postiglione, V. Moscato, Pie-med: Predicting, interpreting and explaining
medical recommendations, in: C. Hauf, C. Macdonald, D. Jannach, G. Kazai, F. M. Nardini,
F. Pinelli, F. Silvestri, N. Tonellotto (Eds.), Advances in Information Retrieval - 47th European
Conference on Information Retrieval, ECIR 2025, Lucca, Italy, April 6-10, 2025, Proceedings,
Part V, volume 15576 of Lecture Notes in Computer Science, Springer, 2025, pp. 6–12. URL: https:
//doi.org/10.1007/978-3-031-88720-8_2. doi:10.1007/978- 3- 031- 88720- 8\_2.
[12] G. Riccio, A. Romano, A. Korsun, M. Cirillo, M. Postiglione, V. L. Gatta, A. Ferraro, A. Galli,
V. Moscato, Healthcare data summarization via medical entity recognition and generative AI,
in: N. Bena, B. D. Martino, A. Maratea, A. Sperduti, E. D. Nardo, A. Ciaramella, R. Montella,
C. A. Ardagna (Eds.), Proceedings of the 2nd Italian Conference on Big Data and Data Science
(ITADATA 2023), Naples, Italy, September 11-13, 2023, volume 3606 of CEUR Workshop Proceedings,
CEUR-WS.org, 2023, pp. –. URL: https://ceur-ws.org/Vol-3606/paper47.pdf.
[13] A. Ferraro, A. Galli, V. La Gatta, M. Postiglione, D. Russo, G. M. Orlando, G. Riccio, A. Romano,
V. Moscato, From explanation to exploration: Promoting diversity in recommendation systems, in:
International Workshop on Recommender Systems for Sustainability and Social Good, Springer,
2024, pp. 135–150.
[14] S. Zhao, Y. Yang, Z. Wang, Z. He, L. Qiu, L. Qiu, Retrieval augmented generation (RAG) and
beyond: A comprehensive survey on how to make your llms use external data more wisely, CoRR
abs/2409.14924 (2024). URL: https://doi.org/10.48550/arXiv.2409.14924. doi:10.48550/ARXIV.2409.
14924. arXiv:2409.14924.
[15] S. Ranasinghe, D. D. Silva, N. Mills, D. Alahakoon, M. Manic, Y. Lim, W. Ranasinghe, Addressing
the productivity paradox in healthcare with retrieval augmented generative AI chatbots, in: IEEE
International Conference on Industrial Technology, ICIT 2024, Bristol, UK, March 25-27, 2024, IEEE,
2024, pp. 1–6. URL: https://doi.org/10.1109/ICIT58233.2024.10540818. doi:10.1109/ICIT58233.
2024.10540818.
[16] M. Alkhalaf, P. Yu, M. Yin, C. Deng, Applying generative ai with retrieval augmented generation
to summarize and extract key clinical information from electronic health records, Journal of
Biomedical Informatics 156 (2024) 104662. URL: https://www.sciencedirect.com/science/article/pii/
S1532046424000807. doi:https://doi.org/10.1016/j.jbi.2024.104662.
[17] S. Murali, S. S., S. R., Remag-kr: Retrieval and medically assisted generation with knowledge
reduction for medical question answering, in: X. Fu, E. Fleisig (Eds.), Proceedings of the 62nd
Annual Meeting of the Association for Computational Linguistics, ACL 2024 - Student Research
Workshop, Bangkok, Thailand, August 11-16, 2024, Association for Computational Linguistics,
2024, pp. 140–145. URL: https://aclanthology.org/2024.acl-srw.13.
[18] J. Bayarri-Planas, A. K. Gururajan, U. C. D. Garcia-Gasulla, Boosting healthcare llms through
retrieved context, CoRR abs/2409.15127 (2024). URL: https://doi.org/10.48550/arXiv.2409.15127.
doi:10.48550/ARXIV.2409.15127. arXiv:2409.15127.
[19] A. Romano, G. Riccio, M. Postiglione, V. Moscato, Identifying cardiological disorders in spanish
via data augmentation and fine-tuned language models, in: G. Faggioli, N. Ferro, P. Galuscáková,
A. G. S. de Herrera (Eds.), Working Notes of the Conference and Labs of the Evaluation Forum
(CLEF 2024), Grenoble, France, 9-12 September, 2024, volume 3740 of CEUR Workshop Proceedings,
CEUR-WS.org, 2024, pp. 207–222. URL: https://ceur-ws.org/Vol-3740/paper-19.pdf.
[20] AI@Meta, Llama 3 model card, Meta (2024). URL: https://github.com/meta-llama/llama3/blob/
main/MODEL_CARD.md.
[21] C.-Y. Lin, Rouge: A package for automatic evaluation of summaries, in: Text summarization
branches out, 2004, pp. 74–81.
[22] T. Zhang, V. Kishore, F. Wu, K. Q. Weinberger, Y. Artzi, Bertscore: Evaluating text generation with
bert, arXiv preprint arXiv:1904.09675 (2019).
[23] N. Reimers, I. Gurevych, Making monolingual sentence embeddings multilingual using knowledge
distillation, in: Proceedings of the 2020 Conference on Empirical Methods in Natural Language
Processing (EMNLP), 2020, pp. 4512–4525.
[24] K. Papineni, S. Roukos, T. Ward, W.-J. Zhu, Bleu: a method for automatic evaluation of machine
translation, in: Proceedings of the 40th annual meeting of the Association for Computational
Linguistics, 2002, pp. 311–318.</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          [1]
          <string-name>
            <given-names>M. A.</given-names>
            <surname>Ahmad</surname>
          </string-name>
          , I. Yaramis,
          <string-name>
            <given-names>T. D.</given-names>
            <surname>Roy</surname>
          </string-name>
          ,
          <article-title>Creating trustworthy llms: Dealing with hallucinations in healthcare AI</article-title>
          ,
          <source>CoRR abs/2311</source>
          .01463 (
          <year>2023</year>
          ). URL: https://doi.org/10.48550/arXiv.2311.01463. doi:
          <volume>10</volume>
          .48550/ARXIV.2311.01463. arXiv:
          <volume>2311</volume>
          .
          <fpage>01463</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          [2]
          <string-name>
            <given-names>A.</given-names>
            <surname>Bora</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Cuayáhuitl</surname>
          </string-name>
          ,
          <article-title>Systematic analysis of retrieval-augmented generation-based llms for medical chatbot applications</article-title>
          ,
          <source>Machine Learning and Knowledge Extraction</source>
          <volume>6</volume>
          (
          <year>2024</year>
          )
          <fpage>2355</fpage>
          -
          <lpage>2374</lpage>
          . URL: https://www.mdpi.com/2504-4990/6/4/116. doi:
          <volume>10</volume>
          .3390/make6040116.
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          [3]
          <string-name>
            <given-names>P.</given-names>
            <surname>Lewis</surname>
          </string-name>
          ,
          <string-name>
            <given-names>E.</given-names>
            <surname>Perez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Piktus</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Petroni</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Karpukhin</surname>
          </string-name>
          ,
          <string-name>
            <given-names>N.</given-names>
            <surname>Goyal</surname>
          </string-name>
          ,
          <string-name>
            <given-names>H.</given-names>
            <surname>Küttler</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Lewis</surname>
          </string-name>
          ,
          <string-name>
            <given-names>W.</given-names>
            <surname>Yih</surname>
          </string-name>
          ,
          <string-name>
            <given-names>T.</given-names>
            <surname>Rocktäschel</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Riedel</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Kiela</surname>
          </string-name>
          ,
          <article-title>Retrieval-augmented generation for knowledge-intensive NLP tasks</article-title>
          , in: H.
          <string-name>
            <surname>Larochelle</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Ranzato</surname>
            ,
            <given-names>R.</given-names>
          </string-name>
          <string-name>
            <surname>Hadsell</surname>
            ,
            <given-names>M.</given-names>
          </string-name>
          <string-name>
            <surname>Balcan</surname>
          </string-name>
          , H. Lin (Eds.),
          <source>Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems</source>
          <year>2020</year>
          ,
          <article-title>NeurIPS 2020</article-title>
          , December 6-
          <issue>12</issue>
          ,
          <year>2020</year>
          , virtual,
          <year>2020</year>
          , pp.
          <article-title>-</article-title>
          . URL: https://proceedings.neurips.cc/ paper/2020/hash/6b493230205f780e1bc26945df7481e5-Abstract.html.
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          [4]
          <string-name>
            <given-names>S. G.</given-names>
            <surname>Haez</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Segala</surname>
          </string-name>
          ,
          <string-name>
            <given-names>P.</given-names>
            <surname>Bellan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>S.</given-names>
            <surname>Magnolini</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Sanna</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Consolandi</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Dragoni</surname>
          </string-name>
          ,
          <article-title>A retrieval-augmented generation strategy to enhance medical chatbot reliability</article-title>
          , in: J.
          <string-name>
            <surname>Finkelstein</surname>
            ,
            <given-names>R.</given-names>
          </string-name>
          <string-name>
            <surname>Moskovitch</surname>
          </string-name>
          , E. Parimbelli (Eds.),
          <source>Artificial Intelligence in Medicine - 22nd International Conference, AIME</source>
          <year>2024</year>
          ,
          <article-title>Salt Lake City</article-title>
          ,
          <string-name>
            <surname>UT</surname>
          </string-name>
          , USA, July
          <volume>9</volume>
          -
          <issue>12</issue>
          ,
          <year>2024</year>
          , Proceedings,
          <string-name>
            <surname>Part</surname>
            <given-names>I</given-names>
          </string-name>
          , volume
          <volume>14844</volume>
          of Lecture Notes in Computer Science, Springer,
          <year>2024</year>
          , pp.
          <fpage>213</fpage>
          -
          <lpage>223</lpage>
          . URL: https: //doi.org/10.1007/978-3-
          <fpage>031</fpage>
          -66538-7_
          <fpage>22</fpage>
          . doi:
          <volume>10</volume>
          .1007/978- 3-
          <fpage>031</fpage>
          - 66538- 7\_
          <fpage>22</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          [5]
          <string-name>
            <given-names>R.</given-names>
            <surname>Russo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Russo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Orlando</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Romano</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Riccio</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V. L.</given-names>
            <surname>Gatta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Postiglione</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Moscato</surname>
          </string-name>
          ,
          <article-title>Europeanlawadvisor: an open source search engine for european laws</article-title>
          , in: W. Ding,
          <string-name>
            <given-names>C.</given-names>
            <surname>Lu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Wang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Di</surname>
          </string-name>
          ,
          <string-name>
            <given-names>K.</given-names>
            <surname>Wu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Huan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Nambiar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Li</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Ilievski</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Baeza-Yates</surname>
          </string-name>
          ,
          <string-name>
            <surname>X.</surname>
          </string-name>
          Hu (Eds.),
          <source>IEEE International Conference on Big Data, BigData 2024</source>
          , Washington, DC, USA, December
          <volume>15</volume>
          -
          <issue>18</issue>
          ,
          <year>2024</year>
          , IEEE,
          <year>2024</year>
          , pp.
          <fpage>4751</fpage>
          -
          <lpage>4756</lpage>
          . URL: https://doi.org/10.1109/BigData62323.
          <year>2024</year>
          .
          <volume>10826025</volume>
          . doi:
          <volume>10</volume>
          . 1109/BIGDATA62323.
          <year>2024</year>
          .
          <volume>10826025</volume>
          .
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          [6]
          <string-name>
            <given-names>D.</given-names>
            <surname>Russo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Orlando</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Romano</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Riccio</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V. L.</given-names>
            <surname>Gatta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Postiglione</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Moscato</surname>
          </string-name>
          ,
          <article-title>Scaling llm-based knowledge graph generation: A case study of italian geopolitical news</article-title>
          , in: W. Ding,
          <string-name>
            <given-names>C.</given-names>
            <surname>Lu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Wang</surname>
          </string-name>
          ,
          <string-name>
            <given-names>L.</given-names>
            <surname>Di</surname>
          </string-name>
          ,
          <string-name>
            <given-names>K.</given-names>
            <surname>Wu</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Huan</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Nambiar</surname>
          </string-name>
          ,
          <string-name>
            <given-names>J.</given-names>
            <surname>Li</surname>
          </string-name>
          ,
          <string-name>
            <given-names>F.</given-names>
            <surname>Ilievski</surname>
          </string-name>
          ,
          <string-name>
            <given-names>R.</given-names>
            <surname>Baeza-Yates</surname>
          </string-name>
          ,
          <string-name>
            <surname>X.</surname>
          </string-name>
          Hu (Eds.),
          <source>IEEE International Conference on Big Data, BigData 2024</source>
          , Washington, DC, USA, December
          <volume>15</volume>
          -
          <issue>18</issue>
          ,
          <year>2024</year>
          , IEEE,
          <year>2024</year>
          , pp.
          <fpage>3494</fpage>
          -
          <lpage>3497</lpage>
          . URL: https://doi.org/10.1109/BigData62323.
          <year>2024</year>
          .
          <volume>10825937</volume>
          . doi:
          <volume>10</volume>
          .1109/BIGDATA62323.
          <year>2024</year>
          .
          <volume>10825937</volume>
          .
        </mixed-citation>
      </ref>
      <ref id="ref7">
        <mixed-citation>
          [7]
          <string-name>
            <given-names>A.</given-names>
            <surname>Ferraro</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Galli</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V. L.</given-names>
            <surname>Gatta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>M.</given-names>
            <surname>Postiglione</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Orlando</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Russo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G.</given-names>
            <surname>Riccio</surname>
          </string-name>
          ,
          <string-name>
            <given-names>A.</given-names>
            <surname>Romano</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Moscato</surname>
          </string-name>
          ,
          <article-title>Agent-based modelling meets generative AI in social network simulations</article-title>
          , in: L.
          <string-name>
            <surname>M. Aiello</surname>
            ,
            <given-names>T.</given-names>
          </string-name>
          <string-name>
            <surname>Chakraborty</surname>
          </string-name>
          , S. Gaito (Eds.),
          <source>Social Networks Analysis and Mining - 16th International Conference, ASONAM</source>
          <year>2024</year>
          , Rende, Italy, September 2-
          <issue>5</issue>
          ,
          <year>2024</year>
          , Proceedings,
          <string-name>
            <surname>Part</surname>
            <given-names>I</given-names>
          </string-name>
          , volume
          <volume>15211</volume>
          of Lecture Notes in Computer Science, Springer,
          <year>2024</year>
          , pp.
          <fpage>155</fpage>
          -
          <lpage>170</lpage>
          . URL: https://doi.org/10.1007/ 978-3-
          <fpage>031</fpage>
          -78541-2_
          <fpage>10</fpage>
          . doi:
          <volume>10</volume>
          .1007/978- 3-
          <fpage>031</fpage>
          - 78541- 2\_
          <fpage>10</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref8">
        <mixed-citation>
          [8]
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Orlando</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>La Gatta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Russo</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Moscato</surname>
          </string-name>
          ,
          <article-title>Can generative agent-based modeling replicate the friendship paradox in social media simulations?</article-title>
          ,
          <source>in: Proceedings of the 17th ACM Web Science Conference</source>
          <year>2025</year>
          ,
          <year>2025</year>
          , pp.
          <fpage>510</fpage>
          -
          <lpage>515</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref9">
        <mixed-citation>
          [9]
          <string-name>
            <given-names>A.</given-names>
            <surname>Ferraro</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Orlando</surname>
          </string-name>
          ,
          <string-name>
            <given-names>D.</given-names>
            <surname>Russo</surname>
          </string-name>
          ,
          <article-title>Generative agent-based modeling with large language models for insider threat detection</article-title>
          ,
          <source>Engineering Applications of Artificial Intelligence</source>
          <volume>157</volume>
          (
          <year>2025</year>
          )
          <fpage>111343</fpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref10">
        <mixed-citation>
          [10]
          <string-name>
            <given-names>L.</given-names>
            <surname>Costabile</surname>
          </string-name>
          ,
          <string-name>
            <given-names>G. M.</given-names>
            <surname>Orlando</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>La Gatta</surname>
          </string-name>
          ,
          <string-name>
            <given-names>V.</given-names>
            <surname>Moscato</surname>
          </string-name>
          ,
          <article-title>Assessing the potential of generative agents in crowdsourced fact-checking</article-title>
          ,
          <source>arXiv preprint arXiv:2504.19940</source>
          (
          <year>2025</year>
          ).
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>