<?xml version="1.0" encoding="UTF-8"?>
<TEI xml:space="preserve" xmlns="http://www.tei-c.org/ns/1.0" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 https://raw.githubusercontent.com/kermitt2/grobid/master/grobid-home/schemas/xsd/Grobid.xsd"
 xmlns:xlink="http://www.w3.org/1999/xlink">
	<teiHeader xml:lang="en">
		<fileDesc>
			<titleStmt>
				<title level="a" type="main">Modest-Pharo: Unit Test Generation for Pharo Based on Traces and Metamodels</title>
			</titleStmt>
			<publicationStmt>
				<publisher/>
				<availability status="unknown"><licence/></availability>
			</publicationStmt>
			<sourceDesc>
				<biblStruct>
					<analytic>
						<author>
							<persName><forename type="first">Gabriel</forename><surname>Darbord</surname></persName>
							<email>gabriel.darbord@inria.fr</email>
							<affiliation key="aff0">
								<orgName type="laboratory">UMR 9189 CRIStAL</orgName>
								<orgName type="institution" key="instit1">Univ. Lille</orgName>
								<orgName type="institution" key="instit2">Inria</orgName>
								<orgName type="institution" key="instit3">CNRS</orgName>
								<orgName type="institution" key="instit4">Centrale Lille</orgName>
								<address>
									<postCode>F-59000</postCode>
									<settlement>Lille</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Fabio</forename><surname>Vandewaeter</surname></persName>
							<email>fabio.vandewaeter.etu@univ-lille.fr</email>
							<affiliation key="aff0">
								<orgName type="laboratory">UMR 9189 CRIStAL</orgName>
								<orgName type="institution" key="instit1">Univ. Lille</orgName>
								<orgName type="institution" key="instit2">Inria</orgName>
								<orgName type="institution" key="instit3">CNRS</orgName>
								<orgName type="institution" key="instit4">Centrale Lille</orgName>
								<address>
									<postCode>F-59000</postCode>
									<settlement>Lille</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Anne</forename><surname>Etien</surname></persName>
							<email>anne.etien@inria.fr</email>
							<affiliation key="aff1">
								<orgName type="laboratory">UMR 9189 CRIStAL</orgName>
								<orgName type="institution" key="instit1">Univ. Lille</orgName>
								<orgName type="institution" key="instit2">CNRS</orgName>
								<orgName type="institution" key="instit3">Inria</orgName>
								<orgName type="institution" key="instit4">Centrale Lille</orgName>
								<address>
									<postCode>F-59000</postCode>
									<settlement>Lille</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Nicolas</forename><surname>Anquetil</surname></persName>
							<email>nicolas.anquetil@inria.fr</email>
							<affiliation key="aff1">
								<orgName type="laboratory">UMR 9189 CRIStAL</orgName>
								<orgName type="institution" key="instit1">Univ. Lille</orgName>
								<orgName type="institution" key="instit2">CNRS</orgName>
								<orgName type="institution" key="instit3">Inria</orgName>
								<orgName type="institution" key="instit4">Centrale Lille</orgName>
								<address>
									<postCode>F-59000</postCode>
									<settlement>Lille</settlement>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Benoit</forename><surname>Verhaeghe</surname></persName>
							<email>benoit.verhaeghe@berger-levrault.com</email>
							<affiliation key="aff2">
								<orgName type="institution">Berger-Levrault</orgName>
								<address>
									<country key="FR">France</country>
								</address>
							</affiliation>
						</author>
						<author>
							<persName><forename type="first">Al</forename><surname>Ceur</surname></persName>
						</author>
						<author>
							<persName><forename type="first">Workshop</forename><surname>Proceedings</surname></persName>
						</author>
						<title level="a" type="main">Modest-Pharo: Unit Test Generation for Pharo Based on Traces and Metamodels</title>
					</analytic>
					<monogr>
						<idno type="ISSN">1613-0073</idno>
					</monogr>
					<idno type="MD5">7FFF6785E23D4AAA9A133E158D61420D</idno>
				</biblStruct>
			</sourceDesc>
		</fileDesc>
		<encodingDesc>
			<appInfo>
				<application version="0.7.2" ident="GROBID" when="2025-04-23T18:10+0000">
					<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>
					<ref target="https://github.com/kermitt2/grobid"/>
				</application>
			</appInfo>
		</encodingDesc>
		<profileDesc>
			<textClass>
				<keywords>
					<term>test generation</term>
					<term>unit tests</term>
					<term>regression testing</term>
					<term>trace-based</term>
					<term>metamodels</term>
					<term>Pharo</term>
				</keywords>
			</textClass>
			<abstract>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Unit testing is essential in software development to ensure code functionality and prevent the introduction of bugs. However, challenges such as time constraints and insufficient resource allocation often impede comprehensive testing efforts, leaving software systems vulnerable to regression. To address this issue, we introduce Modest, a language-agnostic approach to unit test generation that uses metamodels and execution traces. This method ensures non-regression by replaying scenarios captured from real-world executions. We demonstrate the application of Modest to Pharo codebases by generating unit tests for two projects. A total of 188 tests were generated and compared to existing tests based on mutation coverage, and we found that combining existing and generated tests increased coverage.</p></div>
			</abstract>
		</profileDesc>
	</teiHeader>
	<text xml:lang="en">
		<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="1.">Introduction</head><p>Unit testing is an essential part of software development, serving as a critical mechanism for verifying code functionality and mitigating the risk of introducing bugs. Despite its importance, time constraints and inadequate resource allocation often prevent the widespread adoption of unit testing practices. This can result in codebases that lack proper testing, leaving software systems vulnerable to bugs, issues, and regressions <ref type="bibr" target="#b0">[1,</ref><ref type="bibr" target="#b1">2]</ref>.</p><p>While existing approaches <ref type="bibr" target="#b2">[3,</ref><ref type="bibr" target="#b3">4,</ref><ref type="bibr" target="#b4">5]</ref> showed promising results in test generation, they have some limitations, such as being specific to a particular programming language or testing framework. To address this issue, we propose Modest, a language-agnostic approach to test generation. This approach involves the use of metamodels to facilitate the representation and generation of unit tests. The use of metamodels provides a solution that is independent of the programming language and testing framework. It enables automated transformation and code generation. Specifically, we use three metamodels: the unit test metamodel, which represents unit test elements; the code metamodel, which represents the codebase; and the value metamodel, which specifies the values used to test the codebase. Our approach is not intended to replace test-driven development or classic development practices where tests are written during the development phase. Our approach aims to generate unit tests on legacy software systems where tests are partially or completely missing. The generated tests help manage regression and identify new software bugs in existing areas of a system after changes have been made.</p><p>Furthermore, we aim to generate maintainable test code that is easy for humans to understand. Human-readable and maintainable tests make it easier for developers to understand how the code works and to make changes to the codebase with confidence <ref type="bibr" target="#b5">[6]</ref>. In addition, human-readable tests can be helpful when onboarding new developers to a project, or when maintaining code written by others.</p><p>Ultimately, maintainable tests can reduce the amount of time spent debugging and fixing issues in the codebase.</p><p>To generate realistic tests, we use application traces consisting of method arguments and return values to leverage values from real business scenarios. Traces refer to the sequential recording of actions or operations in a system during its execution. This information is critical because it provides an accurate representation of how the software behaves at runtime.</p><p>In our previous work <ref type="bibr" target="#b6">[7]</ref>, we introduced two metamodels: the Value metamodel for representing runtime values, and the Unit Test metamodel. Our approach is based on Moose, a platform for software and data analysis <ref type="foot" target="#foot_0">1</ref> . This infrastructure allows us to extract knowledge from software systems and to apply our approach across programming languages.</p><p>In this paper, we present our five-step approach in Section 2. In Section 3 we explain the implementation of some steps in the case of test generation in Pharo. Section 4 presents some results on concrete Pharo applications. We discuss related works in Section 5 In Section 6, conclusions are drawn and perspectives are proposed.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.">Modest: a Unit test Generation Approach</head><p>Modest uses method execution traces to generate unit tests. This approach assumes that the current version of the software system for which tests are being generated is correct, allowing execution traces to be used as an oracle. The process relies on five steps to generate unit tests, as shown in Figure <ref type="figure" target="#fig_0">1</ref>. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>Codebase Traces</head><note type="other">Value</note></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.1.">Prerequisites</head><p>There are two independent requirements that must be met before the test generation process can begin.</p><p>Step 1: Obtain a model of the application. Using the capabilities of the Moose platform, we create a comprehensive model of the application for which tests are to be generated. This model captures the structural aspects of the application, such as its classes and methods, and their relationships.</p><p>Step 2: Produce traces of the application. Data about the execution of the current version of the software system is recorded as a trace. Each trace corresponds to a specific method execution and must contain the following information: method identity, arguments, return value, and the receiver object. The method identity is a way to know exactly which method was executed. This is critical because multiple methods in the system can have the same signature due to polymorphism. This identity consists of the fully qualified class name and the method signature, including parameter types in the case of statically typed languages.</p><p>For a given project, any method that has no side effects and returns a value is a candidate for instrumentation. Side effects include use of the file system, graphical interfaces, network, global states, and randomness. Each execution of an instrumented method can result in a generated test. Thus, for a given executable comment or existing test, multiple tests can be generated that differ in the value of the arguments and the return value.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="2.2.">Test Generation Process</head><p>Once the prerequisites are met, the following steps are performed iteratively for each test generation cycle.</p><p>Step 3: Import and parse trace data. Traces are imported into Modest and reified to conform to a specific format. This ensures that the imported traces are represented consistently, regardless of the original storage format. The serialized data contained in each trace is parsed to extract relevant information. This parsed data is then reified using our Value metamodel, transforming it into a standard format for further processing. The Value metamodel bridges static code elements, such as method parameters, with dynamic runtime values, such as method arguments.</p><p>Step 4: Build a unit test model. The Unit Test metamodel presented in <ref type="bibr" target="#b6">[7]</ref> is agnostic to the language and the testing framework used. It is built around the Arrange Act Assert (AAA) pattern, a widely used approach to structuring unit tests. We use the trace of a particular method execution to determine the test class and method, as well as the arrange, act, and assert phases of a unit test. The executed method determines the test method, while its class determines the test class. The method arguments determine the arrange and act phases of the test, where they are set up, used, and torn down. Finally, the result obtained from the trace determines the assert phase. We use the result as a test oracle, and the actual return value obtained in the act phase is compared to the expected value from the trace.</p><p>Step 5: Export the unit test model into concrete tests. The unit test model is translated into executable test code specific to the target language and the specific testing framework. This translation involves converting the model into Abstract Syntax Tree (AST) nodes. Finally, the AST nodes are used to generate the actual unit tests.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="3.">Adapting the Modest Approach to Pharo</head><p>In this section, we outline our methodology for generating unit tests for Pharo software systems. The Moose platform is used for application modeling, and a Pharo implementation of OpenTelemetry is used to produce execution traces. Consequently, details are given for language-dependent steps, i.e. steps 2, 3, and 5.</p><p>Step 2: Produce traces of the application. We use a Pharo implementation<ref type="foot" target="#foot_1">2</ref> of OpenTelemetry to generate execution traces of the application. OpenTelemetry <ref type="foot" target="#foot_2">3</ref> is an open-source observability framework and standard designed to generate, collect, and export telemetry data such as traces. It provides tools and APIs for instrumenting applications to monitor and analyze their behavior. Our implementation uses MetaLinks <ref type="bibr" target="#b7">[8,</ref><ref type="bibr" target="#b8">9]</ref>, which allows the execution of instrumentation code before and after the method on which it is installed. We use this mechanism to record the method identity, arguments, return value, and the receiver. The instrumentation does not propagate to outgoing calls, only the targeted methods are traced. This preliminary step is only concerned with generating trace data. These traces will be fed into Modest in the following step, which will take place at a later date and possibly in a different Pharo image. Thus, the recorded data must be serialized for storage. In addition, we require that the serialized objects contain enough information to be correctly represented by the value metamodel, such as their runtime type.</p><p>The STON library <ref type="foot" target="#foot_3">4</ref> encodes the runtime type data we need, but it is not able to serialize all types of objects. In addition, STON allows developers to define a custom serialization format for their class. While this customization is useful, it makes the object encoding opaque to external tools such as Modest. Consequently, we developed a custom library inspired by Jackson<ref type="foot" target="#foot_4">5</ref> , called PharoJackson<ref type="foot" target="#foot_5">6</ref> , with the goal of being able to serialize any object to JSON in a consistent way. Similar to STON and Jackson, our library includes metadata to express the object type and handles circular references.</p><p>Step 3: Import and parse trace data. When the execution traces are imported into Modest, the data they contain is parsed to extract the relevant information. First, the method identity is used to determine the origin of the trace, corresponding to the method to be tested. For Pharo, this consists of the method selector and the name of the defining class.</p><p>Then, the serialized data containing the method arguments, return value, and receiver is deserialized from JSON to basic data structures: dictionaries, arrays, strings; and primitive data types: numbers, booleans, and null values. Except for dictionaries, all these types represent instances of their corresponding class in Pharo. For example, a JSON array corresponds to an instance of a Pharo Array.</p><p>Listing 1: User and Session objects serialized with PharoJackson 1 { 2 "@type": "User", 3 "@id": 1,</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head>4</head><p>"name": "John Doe", 5 "session": { 6 "@type": "Session", 7 "@id": 2, Dictionaries are a special case because they are used to represent objects. Their key-value pairs correspond to attribute names and values (e.g. in Listing 1, the name attribute on line 4). In addition, metadata is added by PharoJackson: the @type value indicates the class of the object (e.g. line 2 indicates it is an instance of the User class), and the @id value is an identifier for handling circular dependencies (e.g. line 3). If the same object is referenced more than once, it is subsequently represented by a dictionary with a @ref value indicating the identifier of the corresponding object (e.g. line 9).</p><p>Thus, deserializing the trace data returns a graph of basic data structures. The importer of the Value metamodel is designed to interpret this specific format. It traverses the graph and instantiates the corresponding Value entities into a model.</p><p>Step 5: Export the unit test model into concrete tests. The unit test model is translated into executable test code specific to the Pharo language and the SUnit testing framework. Each element of the model is systematically visited.</p><p>Test classes are created using Pharo's built-in class creation API. For clarity and separation from existing tests, newly created test classes are named by appending ModestTest to the name of the tested class, e.g. in Listing 2. As part of the class creation process, each test class is then assigned to an appropriate package. Following Pharo's naming conventions, the test package is named after the package of the tested class, with the suffix -Tests added. If the specified test package does not exist, it will be created automatically.</p><p>Listing 2: Definition of the generated test class for the DataFrame class, from the package of the same name.</p><p>TestCase &lt;&lt; #DataFrameModestTest slots: {}; package: 'DataFrame-Tests'</p><p>After visiting a test class within the unit test model, the process moves on to exporting its test methods along with their associated arrange, act, and assert entities. These three entities are linked to value entities, which are visited by a specialized visitor responsible for generating the AST to recreate the values as code, e.g. in Listing 3. Both visitors work together to generate the AST of the test method, which is then materialized and installed in the test class. </p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.">Modest in Action on Pharo Projects</head><p>In this section we evaluate our approach on real Pharo projects. First, relevant projects were selected. Then they were instrumented to generate traces. Finally, we present the generated test cases and the benefits of our approach.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.1.">Selection of Projects</head><p>As explained earlier, our approach is based on execution traces. There are several ways to get them in Pharo, such as manually executing the software to be tested. However, this can be difficult if we are not a user or developer of the software system; it requires expert knowledge. Therefore, alternative ways to generate execution traces are needed. As it happens, there are other ways to run valid execution scenarios: tests and examples, such as executable comments or class-side examples using the &lt;example&gt; pragma. Such examples are very common in kernel packages and graphical projects. However, since our approach uses metalinks to generate the trace, it is not possible to select projects from the kernel that are used by the instrumentation itself, such as the Boolean or Collection packages, as this would break the image. Also, as explained in Section 2, our approach does not currently deal with graphical applications, as it requires that the tested method returns a value. Side effects and randomness are also not handled yet, which limits the choice of projects.</p><p>Two projects were selected: DataFrame<ref type="foot" target="#foot_6">7</ref> and LabelContractor <ref type="foot" target="#foot_7">8</ref> . DataFrame is a tabular data structure for data analysis in Pharo. It organizes and represents data in a tabular format, similar to a spreadsheet or database table. It also provides several algorithms for data manipulation. For our evaluation we only considered the DataFrame class. The LabelContractor project is used to reduce the size of labels for graphical interfaces using different strategies. It currently provides 13 different contraction strategies and two ways to combine them. For our evaluation, we considered the project's main class, a tokenizer class, a helper class, and seven strategies. We report information about the selected classes in Table <ref type="table" target="#tab_0">1</ref>.</p><p>In the case of Dataframe, traces result from running existing tests. In the case of LabelContractor, traces result from running existing tests and executable comments. In both cases, our approach generates tests from these traces. Since tests already exist for these projects, it is possible to compare them with our generated tests in terms of mutation coverage. To obtain these measurements, we used the MuTalk<ref type="foot" target="#foot_8">9</ref> library.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="4.2.">Results</head><p>We generated tests for the previously introduced projects and classes. To reduce the number of generated tests, we recorded only the first execution of each instrumented method. An example of an existing test is shown in Listing 4, and the test that was generated from its execution is shown in Listing 5.</p><p>We now evaluate how the mutation coverage of the existing tests compare to our generated tests. We also look at how the coverage evolves when both existing and generated tests are considered. Our results are reported in Table <ref type="table" target="#tab_1">2</ref>.     The reason for failed tests is that there are still some objects that are not serializable by our library, such as closures. The DataFrame project has a higher number of failed tests compared to LabelContractor. This difference can be attributed to the greater complexity of the DataFrame project, which uses more objects that are currently not serializable.</p><p>For DataFrame, the mutation coverage achieved by the generated tests is lower than that of the existing tests (43% compared to 59%). However, when both existing and generated tests are combined, the mutation coverage improves to 64%. For LabelContractor, the mutation coverage achieved by the generated tests is 43%, lower than the existing test coverage of 56%. When combined, the mutation coverage also improves to 59%.</p><p>We can see that more mutants are killed by existing tests than by generated tests. This can be explained by the fact that existing tests often use auxiliary methods to initialize test values during the setup phase. In contrast, the generated tests rely on a structural reconstruction approach based solely on constructors (new in Pharo) and accessors, or on reflection. During the experiment, mutations were generated for entire classes rather than for specific methods, so existing tests were more likely to encounter and kill a mutation because they execute methods more often and with different arguments.</p><p>These results indicate that the combination of generated and existing tests leads to higher mutation coverage for both projects. The increase can be attributed to the use of structural equality between actual and expected results in the generated tests. This exhaustive recursive comparison helps to identify and kill more mutants than a standard equality check.</p><p>A threat to the validity of the generated tests is their reliance on execution traces. These traces are derived from specific scenarios, and the coverage and effectiveness of the generated tests are inherently tied to the completeness of those scenarios. If the execution traces do not cover relevant code paths or edge cases, the generated tests will also lack coverage in these areas.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="5.">Related Works</head><p>EvoSuite <ref type="bibr" target="#b2">[3]</ref> is characterized by its ability to generate JUnit test cases using evolutionary algorithms, with a specific focus on Java. One of its strengths is achieving high levels of code coverage, including branch and line coverage. However, its generated unit tests often have a distinct style that differs from human-written tests, which can affect their readability <ref type="bibr" target="#b9">[10]</ref>. SmallEvoTest <ref type="bibr" target="#b10">[11]</ref> generates unit tests for dynamically typed programming languages, specifically Pharo and GToolkit, by using a type-profiling mechanism and a genetic algorithm to evolve the unit tests. In contrast to these languagespecific, evolutionary algorithm-driven approaches, our approach aims to be language-agnostic and uses execution traces to generate tests. We also focus on generating code that is more comprehensive for humans.</p><p>Several research studies have explored the use of execution traces for software testing, recognizing the valuable insight they provide into the behavior of a program at runtime. One web testing approach generates test cases from user execution traces <ref type="bibr" target="#b11">[12]</ref>. To improve the test suite, mutation operators were applied to these test cases, simulating potential real-world failures. Tests that yielded different results were kept because they revealed additional behavior in the web application being tested. Techniques such as Daikon's invariant inference, which identifies likely invariants from execution traces, demonstrate the effectiveness of trace-based testing <ref type="bibr" target="#b12">[13]</ref>. In the future, we could use similar methods to identify interesting test scenarios from traces.</p><p>In recent years, test generation tools using deep learning have attracted considerable interest. Among these tools, AthenaTest <ref type="bibr" target="#b3">[4]</ref> stands out for its ability to generate unit test cases for Java programs by learning from actual methods and developer-written tests. Developer surveys indicate that AthenaTest outperforms other tools such as EvoSuite in both test coverage and readability. Building on AthenaTest, A3Test <ref type="bibr" target="#b4">[5]</ref> introduces improvements by integrating assertion knowledge and ensuring consistency in naming and test signatures, resulting in improved correctness and method coverage. CodeT <ref type="bibr" target="#b13">[14]</ref> presents a method that uses pre-trained language models to automatically generate test cases to evaluate the quality and correctness of code solutions. Despite these advances, deep learning-based tools still face notable challenges because they require extensive training data and significant computational resources.</p></div>
<div xmlns="http://www.tei-c.org/ns/1.0"><head n="6.">Conclusion</head><p>In this paper, we introduced Modest, a language-agnostic approach to test generation that uses metamodels to generate unit tests. This approach ensures non-regression by replaying scenarios captured by execution traces. Finally, we showed how Modest can be applied to Pharo by generating unit tests for two projects.</p><p>Looking ahead, several avenues for further development of Modest are possible. These include experimenting with trace selection and mutation <ref type="bibr" target="#b11">[12]</ref>, mining for invariants <ref type="bibr" target="#b12">[13]</ref>, optimizing the generated test suite through coverage modeling, and pruning recreated objects to focus on relevant data. In addition, we plan to evaluate our approach on a larger scale to better understand its effectiveness and applicability. A key aspect of future work will be the criteria for selecting relevant scenarios or traces, which are currently determined by the user. By addressing these areas, we aim to further refine Modest and increase its utility in managing regression in software systems.</p></div><figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_0"><head>Figure 1 :</head><label>1</label><figDesc>Figure 1: The 5 steps of the Modest approach. Entities representing the code to be tested are shown in green (left column), entities representing runtime information are shown in orange (middle column), and entities representing the generated tests are shown in blue (right column).</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_1"><head>8 "</head><label>8</label><figDesc></figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_2"><head>Listing 3 :</head><label>3</label><figDesc>Generated code recreating the object from Listing 1.1 (user := User new)</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_3"><head>Listing 4 :testTokenize 2 3 |</head><label>43</label><figDesc>Existing test from the tokenizer class of the LabelContractor project. LbCTokenizer new tokenize: 'CK123J') 5 equals: #( 'C' 'K123' 'J' ) asOrderedCollection Listing 5: Test generated from the execution trace of Listing 4. 1 expected aString lbCTokenizer actual | 4 expected := OrderedCollection withAll: { 'C'. 'K123'. 'J' }.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_6"><head>7 actual:</head><label>7</label><figDesc>= lbCTokenizer tokenize: aString.</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" xml:id="fig_7"><head>8</head><label></label><figDesc>self assert: actual equals: expected</figDesc></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_0"><head>Table 1</head><label>1</label><figDesc>Selected Pharo projects and the number of evaluated classes. The table shows the number of methods, existing tests, and executable comments for the selected classes. It also shows the number of methods covered by tests and comments, representing the methods for which tests were generated.</figDesc><table><row><cell>1-8</cell></row></table></figure>
<figure xmlns="http://www.tei-c.org/ns/1.0" type="table" xml:id="tab_1"><head>Table 2</head><label>2</label><figDesc>Generated tests for selected Pharo projects and their results. The table shows the number of generated tests, the number of tests that passed and failed, and the mutation coverage achieved by these tests. The combined mutation coverage indicates the coverage when both existing and generated tests are evaluated together.</figDesc><table /></figure>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="1" xml:id="foot_0">https://moosetechnology.org/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="2" xml:id="foot_1">https://github.com/Gabriel-Darbord/opentelemetry-pharo</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="3" xml:id="foot_2">https://opentelemetry.io/</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="4" xml:id="foot_3">https://github.com/svenvc/ston</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="5" xml:id="foot_4">https://github.com/FasterXML/jackson</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="6" xml:id="foot_5">https://github.com/Modest-Project/PharoJackson</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="7" xml:id="foot_6">https://github.com/PolyMathOrg/DataFrame</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="8" xml:id="foot_7">https://github.com/moosetechnology/LabelContractor</note>
			<note xmlns="http://www.tei-c.org/ns/1.0" place="foot" n="9" xml:id="foot_8">https://github.com/pharo-contributions/mutalk</note>
		</body>
		<back>
			<div type="references">

				<listBibl>

<biblStruct xml:id="b0">
	<analytic>
		<title level="a" type="main">A survey of unit testing practices</title>
		<author>
			<persName><forename type="first">P</forename><surname>Runeson</surname></persName>
		</author>
		<idno type="DOI">10.1109/MS.2006.91</idno>
	</analytic>
	<monogr>
		<title level="j">IEEE Software</title>
		<imprint>
			<biblScope unit="volume">23</biblScope>
			<date type="published" when="2006">2006</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b1">
	<analytic>
		<title level="a" type="main">Are there any unit tests? an empirical study on unit testing in open source python projects</title>
		<author>
			<persName><forename type="first">F</forename><surname>Trautsch</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Grabowski</surname></persName>
		</author>
		<idno type="DOI">10.1109/ICST.2017.26</idno>
	</analytic>
	<monogr>
		<title level="m">2017 IEEE International Conference on Software Testing, Verification and Validation (ICST)</title>
				<imprint>
			<date type="published" when="2017">2017</date>
			<biblScope unit="page" from="207" to="218" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b2">
	<analytic>
		<title level="a" type="main">Evosuite: Automatic test suite generation for object-oriented software</title>
		<author>
			<persName><forename type="first">G</forename><surname>Fraser</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Arcuri</surname></persName>
		</author>
		<idno type="DOI">10.1145/2025113.2025179</idno>
		<idno>doi:10.1145/ 2025113.2025179</idno>
		<ptr target="https://doi.org/10.1145/2025113.2025179" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 19th ACM SIGSOFT Symposium and the 13th European Conference on Foundations of Software Engineering, ESEC/FSE &apos;11</title>
				<meeting>the 19th ACM SIGSOFT Symposium and the 13th European Conference on Foundations of Software Engineering, ESEC/FSE &apos;11<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2011">2011</date>
			<biblScope unit="page" from="416" to="419" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b3">
	<monogr>
		<title level="m" type="main">Unit test case generation with transformers and focal context</title>
		<author>
			<persName><forename type="first">M</forename><surname>Tufano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Drain</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Svyatkovskiy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><forename type="middle">K</forename><surname>Deng</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Sundaresan</surname></persName>
		</author>
		<idno>CoRR abs/2009.05617</idno>
		<ptr target="https://arxiv.org/abs/2009.05617.arXiv:2009.05617" />
		<imprint>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b4">
	<monogr>
		<author>
			<persName><forename type="first">S</forename><surname>Alagarsamy</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Tantithamthavorn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Aleti</surname></persName>
		</author>
		<idno type="arXiv">arXiv:2302.10352</idno>
		<title level="m">A3test: Assertion-augmented automated test case generation</title>
				<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b5">
	<analytic>
		<title level="a" type="main">Modeling readability to improve unit tests</title>
		<author>
			<persName><forename type="first">E</forename><surname>Daka</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Campos</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Fraser</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><surname>Dorn</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Weimer</surname></persName>
		</author>
		<idno type="DOI">10.1145/2786805.2786838</idno>
		<ptr target="https://doi-org.ressources-electroniques.univ-lille.fr/10.1145/2786805.2786838.doi:10.1145/2786805.2786838" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2015 10th Joint Meeting on Foundations of Software Engineering, ESEC/FSE 2015</title>
				<meeting>the 2015 10th Joint Meeting on Foundations of Software Engineering, ESEC/FSE 2015<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2015">2015</date>
			<biblScope unit="page" from="107" to="118" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b6">
	<analytic>
		<title level="a" type="main">A unit test metamodel for test generation</title>
		<author>
			<persName><forename type="first">G</forename><surname>Darbord</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Etien</surname></persName>
		</author>
		<author>
			<persName><forename type="first">N</forename><surname>Anquetil</surname></persName>
		</author>
		<author>
			<persName><forename type="first">B</forename><surname>Verhaeghe</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Derras</surname></persName>
		</author>
		<ptr target="https://hal.science/hal-04219649" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 2023 International Workshop on Smalltalk Technologies, CEUR Workshop Proceedings</title>
				<meeting>the 2023 International Workshop on Smalltalk Technologies, CEUR Workshop Proceedings</meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b7">
	<monogr>
		<title level="m" type="main">Sub-method Structural and Behavioral Reflection</title>
		<author>
			<persName><forename type="first">M</forename><surname>Denker</surname></persName>
		</author>
		<imprint>
			<date type="published" when="2008">2008</date>
		</imprint>
		<respStmt>
			<orgName>University of Bern</orgName>
		</respStmt>
	</monogr>
	<note type="report_type">PhD thesis</note>
</biblStruct>

<biblStruct xml:id="b8">
	<analytic>
		<title level="a" type="main">Sub-method, partial behavioral reflection with reflectivity: Looking back on 10 years of use, The Art</title>
		<author>
			<persName><forename type="first">S</forename><surname>Costiou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">V</forename><surname>Aranega</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><surname>Denker</surname></persName>
		</author>
		<idno type="DOI">10.22152/programming-journal.org/2020/4/5</idno>
	</analytic>
	<monogr>
		<title level="j">Science, and Engineering of Programming</title>
		<imprint>
			<biblScope unit="volume">4</biblScope>
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b9">
	<analytic>
		<title level="a" type="main">An empirical investigation on the readability of manual and generated test cases</title>
		<author>
			<persName><forename type="first">G</forename><surname>Grano</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Scalabrino</surname></persName>
		</author>
		<author>
			<persName><forename type="first">H</forename><forename type="middle">C</forename><surname>Gall</surname></persName>
		</author>
		<author>
			<persName><forename type="first">R</forename><surname>Oliveto</surname></persName>
		</author>
		<idno type="DOI">10.1145/3196321.3196363</idno>
		<idno>doi:10.1145/3196321.3196363</idno>
		<ptr target="https://doi.org/10.1145/3196321.3196363" />
	</analytic>
	<monogr>
		<title level="m">Proceedings of the 26th Conference on Program Comprehension, ICPC &apos;18</title>
				<meeting>the 26th Conference on Program Comprehension, ICPC &apos;18<address><addrLine>New York, NY, USA</addrLine></address></meeting>
		<imprint>
			<publisher>Association for Computing Machinery</publisher>
			<date type="published" when="2018">2018</date>
			<biblScope unit="page" from="348" to="351" />
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b10">
	<analytic>
		<title level="a" type="main">SmallEvoTest: Genetically created unit tests for smalltalk</title>
		<author>
			<persName><forename type="first">A</forename><surname>Bergel</surname></persName>
		</author>
		<author>
			<persName><forename type="first">G</forename><surname>Galindo-Gutiérrez</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Fernandez-Blanco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J.-P</forename><surname>Sandoval-Alcocer</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="m">Proceedings of the International Workshop on Smalltalk Technologies, CEUR Workshop Proceedings</title>
				<meeting>the International Workshop on Smalltalk Technologies, CEUR Workshop Proceedings</meeting>
		<imprint>
			<date type="published" when="2023">2023</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b11">
	<analytic>
		<title level="a" type="main">Test case generation based on mutations over user execution traces</title>
		<author>
			<persName><forename type="first">A</forename><forename type="middle">C R</forename><surname>Paiva</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Restivo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Almeida</surname></persName>
		</author>
	</analytic>
	<monogr>
		<title level="j">Software quality journal</title>
		<imprint>
			<biblScope unit="volume">28</biblScope>
			<biblScope unit="page" from="1173" to="1186" />
			<date type="published" when="2020">2020</date>
		</imprint>
	</monogr>
</biblStruct>

<biblStruct xml:id="b12">
	<analytic>
		<title level="a" type="main">The Daikon system for dynamic detection of likely invariants</title>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">D</forename><surname>Ernst</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J</forename><forename type="middle">H</forename><surname>Perkins</surname></persName>
		</author>
		<author>
			<persName><forename type="first">P</forename><forename type="middle">J</forename><surname>Guo</surname></persName>
		</author>
		<author>
			<persName><forename type="first">S</forename><surname>Mccamant</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Pacheco</surname></persName>
		</author>
		<author>
			<persName><forename type="first">M</forename><forename type="middle">S</forename><surname>Tschantz</surname></persName>
		</author>
		<author>
			<persName><forename type="first">C</forename><surname>Xiao</surname></persName>
		</author>
		<idno type="DOI">10.1016/j.scico.2007.01.015</idno>
		<ptr target="https://doi.org/10.1016/j.scico.2007.01.015" />
	</analytic>
	<monogr>
		<title level="j">Science of Computer Programming</title>
		<imprint>
			<biblScope unit="volume">69</biblScope>
			<biblScope unit="page" from="35" to="45" />
			<date type="published" when="2007">2007</date>
		</imprint>
	</monogr>
	<note>special issue on Experimental Software and Toolkits</note>
</biblStruct>

<biblStruct xml:id="b13">
	<monogr>
		<title level="m" type="main">CodeT: Code generation with generated tests</title>
		<author>
			<persName><forename type="first">B</forename><surname>Chen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">F</forename><surname>Zhang</surname></persName>
		</author>
		<author>
			<persName><forename type="first">A</forename><surname>Nguyen</surname></persName>
		</author>
		<author>
			<persName><forename type="first">D</forename><surname>Zan</surname></persName>
		</author>
		<author>
			<persName><forename type="first">Z</forename><surname>Lin</surname></persName>
		</author>
		<author>
			<persName><forename type="first">J.-G</forename><surname>Lou</surname></persName>
		</author>
		<author>
			<persName><forename type="first">W</forename><surname>Chen</surname></persName>
		</author>
		<idno type="DOI">10.48550/ARXIV.2207.10397</idno>
		<ptr target="https://arxiv.org/abs/2207.10397.doi:10.48550/ARXIV.2207.10397" />
		<imprint>
			<date type="published" when="2022">2022</date>
		</imprint>
	</monogr>
</biblStruct>

				</listBibl>
			</div>
		</back>
	</text>
</TEI>
