Skip to content

Commit

Permalink
ICSA25 Content: TransArC via LLMs
Browse files Browse the repository at this point in the history
  • Loading branch information
dfuchss committed Dec 17, 2024
1 parent b49f34e commit 1fe9f00
Show file tree
Hide file tree
Showing 15 changed files with 1,024 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,4 @@ pipeline/src/main/resources/evaluations/
.idea

*.iml
tests/integration-tests/tests-tlr/cache-llm
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.tlr.execution;

import java.io.File;
import java.util.SortedMap;

import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;
import edu.kit.kastel.mcse.ardoco.core.common.util.DataRepositoryHelper;
import edu.kit.kastel.mcse.ardoco.core.execution.ArDoCo;
import edu.kit.kastel.mcse.ardoco.core.execution.runner.ArDoCoRunner;
import edu.kit.kastel.mcse.ardoco.tlr.codetraceability.SadSamCodeTraceabilityLinkRecovery;
import edu.kit.kastel.mcse.ardoco.tlr.codetraceability.SamCodeTraceabilityLinkRecovery;
import edu.kit.kastel.mcse.ardoco.tlr.connectiongenerator.ConnectionGenerator;
import edu.kit.kastel.mcse.ardoco.tlr.models.agents.ArCoTLModelProviderAgent;
import edu.kit.kastel.mcse.ardoco.tlr.models.agents.LLMArchitectureProviderAgent;
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LLMArchitecturePrompt;
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LargeLanguageModel;
import edu.kit.kastel.mcse.ardoco.tlr.recommendationgenerator.RecommendationGenerator;
import edu.kit.kastel.mcse.ardoco.tlr.text.providers.TextPreprocessingAgent;
import edu.kit.kastel.mcse.ardoco.tlr.textextraction.TextExtraction;

public class ArDoCoForSadSamViaLlmCodeTraceabilityLinkRecovery extends ArDoCoRunner {

public ArDoCoForSadSamViaLlmCodeTraceabilityLinkRecovery(String projectName) {
super(projectName);
}

public void setUp(File inputText, File inputCode, SortedMap<String, String> additionalConfigs, File outputDir, LargeLanguageModel largeLanguageModel,
LLMArchitecturePrompt documentationExtractionPrompt, LLMArchitecturePrompt codeExtractionPrompt, LLMArchitecturePrompt.Features codeFeatures,
LLMArchitecturePrompt aggregationPrompt) {
definePipeline(inputText, inputCode, additionalConfigs, largeLanguageModel, documentationExtractionPrompt, codeExtractionPrompt, codeFeatures,
aggregationPrompt);
setOutputDirectory(outputDir);
isSetUp = true;
}

private void definePipeline(File inputText, File inputCode, SortedMap<String, String> additionalConfigs, LargeLanguageModel largeLanguageModel,
LLMArchitecturePrompt documentationExtractionPrompt, LLMArchitecturePrompt codeExtractionPrompt, LLMArchitecturePrompt.Features codeFeatures,
LLMArchitecturePrompt aggregationPrompt) {
ArDoCo arDoCo = this.getArDoCo();
var dataRepository = arDoCo.getDataRepository();

var text = CommonUtilities.readInputText(inputText);
if (text.isBlank()) {
throw new IllegalArgumentException("Cannot deal with empty input text. Maybe there was an error reading the file.");
}
DataRepositoryHelper.putInputText(dataRepository, text);

arDoCo.addPipelineStep(TextPreprocessingAgent.get(additionalConfigs, dataRepository));

var codeConfiguration = ArCoTLModelProviderAgent.getCodeConfiguration(inputCode);

ArCoTLModelProviderAgent arCoTLModelProviderAgent = ArCoTLModelProviderAgent.getArCoTLModelProviderAgent(dataRepository, additionalConfigs, null,
codeConfiguration);
arDoCo.addPipelineStep(arCoTLModelProviderAgent);

LLMArchitectureProviderAgent llmArchitectureProviderAgent = new LLMArchitectureProviderAgent(dataRepository, largeLanguageModel,
documentationExtractionPrompt, codeExtractionPrompt, codeFeatures, aggregationPrompt);
arDoCo.addPipelineStep(llmArchitectureProviderAgent);

arDoCo.addPipelineStep(TextExtraction.get(additionalConfigs, dataRepository));
arDoCo.addPipelineStep(RecommendationGenerator.get(additionalConfigs, dataRepository));
arDoCo.addPipelineStep(ConnectionGenerator.get(additionalConfigs, dataRepository));

arDoCo.addPipelineStep(SamCodeTraceabilityLinkRecovery.get(additionalConfigs, dataRepository));

arDoCo.addPipelineStep(SadSamCodeTraceabilityLinkRecovery.get(additionalConfigs, dataRepository));
}
}
24 changes: 24 additions & 0 deletions stages-tlr/model-provider/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
</parent>
<artifactId>model-provider</artifactId>

<properties>
<langchain4j.version>0.33.0</langchain4j.version>
</properties>

<dependencies>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
Expand All @@ -33,6 +37,26 @@
<artifactId>commons-io</artifactId>
<version>2.15.1</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-ollama</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-open-ai</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>io.github.ardoco.core</groupId>
<artifactId>common</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
import edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.Extractor;
import edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.architecture.pcm.PcmExtractor;
import edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.architecture.raw.RawArchitectureExtractor;
import edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.architecture.uml.UmlExtractor;

public record ArchitectureConfiguration(File architectureFile, ArchitectureModelType type) {
Expand All @@ -22,7 +23,7 @@ public Extractor extractor() {
return switch (type) {
case PCM -> new PcmExtractor(architectureFile.getAbsolutePath());
case UML -> new UmlExtractor(architectureFile.getAbsolutePath());
case RAW -> throw new IllegalArgumentException("Raw model is not supported for this project.");
case RAW -> new RawArchitectureExtractor(architectureFile.getAbsolutePath());
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/* Licensed under MIT 2024. */
package edu.kit.kastel.mcse.ardoco.tlr.models.agents;

import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.data.DataRepository;
import edu.kit.kastel.mcse.ardoco.core.pipeline.agent.PipelineAgent;
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LLMArchitecturePrompt;
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LLMArchitectureProviderInformant;
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LargeLanguageModel;

public class LLMArchitectureProviderAgent extends PipelineAgent {

public LLMArchitectureProviderAgent(DataRepository dataRepository, LargeLanguageModel largeLanguageModel,
LLMArchitecturePrompt documentationExtractionPrompt, LLMArchitecturePrompt codeExtractionPrompt, LLMArchitecturePrompt.Features codeFeatures,
LLMArchitecturePrompt aggregationPrompt) {
super(List.of(new LLMArchitectureProviderInformant(dataRepository, largeLanguageModel, documentationExtractionPrompt, codeExtractionPrompt,
codeFeatures, aggregationPrompt)), LLMArchitectureProviderAgent.class.getSimpleName(), dataRepository);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/* Licensed under MIT 2024. */
package edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.architecture.raw;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.TreeSet;
import java.util.UUID;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
import edu.kit.kastel.mcse.ardoco.core.api.models.ModelType;
import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel;
import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.architecture.ArchitectureComponent;
import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.architecture.ArchitectureItem;
import edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.architecture.ArchitectureExtractor;

/**
* An extractor for raw architecture models. Expected format:
* <p>
* Name1::ID<br>
* Name2::ID
* </p>
*/
public class RawArchitectureExtractor extends ArchitectureExtractor {
private static final Logger logger = LoggerFactory.getLogger(RawArchitectureExtractor.class);

public RawArchitectureExtractor(String modelPath) {
super(modelPath);
}

@Override
public ArchitectureModel extractModel() {
try {
List<String> lines = Files.readAllLines(Path.of(this.path));

List<ArchitectureItem> components = new ArrayList<>();
for (String line : lines) {
if (line.isBlank()) {
continue;
}
String[] parts = line.split("::", 2);
if (parts.length != 2) {
logger.warn("Line has no ID. Using random ID: {}", line);
parts = Arrays.copyOf(parts, 2);
parts[1] = parts[0] + "_" + UUID.randomUUID();
}
components.add(new ArchitectureComponent(parts[0].trim(), parts[1].trim(), new TreeSet<>(), new TreeSet<>(), new TreeSet<>(), "component"));
}

return new ArchitectureModel(components);

} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

@Override
public ModelType getModelType() {
return ArchitectureModelType.RAW;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/* Licensed under MIT 2024. */
package edu.kit.kastel.mcse.ardoco.tlr.models.informants;

import static edu.kit.kastel.mcse.ardoco.core.common.JsonHandling.createObjectMapper;

import java.io.File;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import com.fasterxml.jackson.core.StreamReadConstraints;
import com.fasterxml.jackson.core.StreamWriteConstraints;
import com.fasterxml.jackson.databind.ObjectMapper;

import edu.kit.kastel.mcse.ardoco.core.common.JsonHandling;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.type.TypeReference;

import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.output.Response;
import edu.kit.kastel.mcse.ardoco.core.architecture.Deterministic;

@Deterministic
public class CachedChatLanguageModel implements ChatLanguageModel {

private static final Logger logger = LoggerFactory.getLogger(CachedChatLanguageModel.class);

private static final String CACHE_DIR = "cache-llm/";
static {
new File(CACHE_DIR).mkdirs();
}

private final ChatLanguageModel chatLanguageModel;
private final String cacheKey;

private Map<String, String> cache = new LinkedHashMap<>();

public CachedChatLanguageModel(ChatLanguageModel chatLanguageModel, String cacheKey) {
this.chatLanguageModel = chatLanguageModel;
this.cacheKey = cacheKey;
try {
this.cache = createObjectMapper().readValue(new File(CACHE_DIR + cacheKey + "-cache.json"), new TypeReference<>() {
});
} catch (IOException e) {
logger.debug("Could not read cache file", e);
}
}

@Override
public Response<AiMessage> generate(List<ChatMessage> messages) {
if (cache.containsKey(messages.toString())) {
return Response.from(new AiMessage(cache.get(messages.toString())));
}
Response<AiMessage> response = chatLanguageModel.generate(messages);
cache.put(messages.toString(), response.content().text());
try {
createObjectMapper().writeValue(new File(CACHE_DIR + cacheKey + "-cache.json"), cache);
} catch (IOException e) {
logger.error("Could not write cache file", e);
}
return response;
}

private static ObjectMapper createObjectMapper() {
ObjectMapper oom = JsonHandling.createObjectMapper();
oom.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxNameLength(100000).build());
return oom;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/* Licensed under MIT 2024. */
package edu.kit.kastel.mcse.ardoco.tlr.models.informants;

import java.util.List;

public enum LLMArchitecturePrompt {
DOCUMENTATION_ONLY_V1(
"""
Your task is to identify the high-level components based on a software architecture documentation. In a first step, you shall elaborate on the following documentation:
%s
""",
"""
Now provide a list that only covers the component names in camel case. Omit common prefixes and suffixes.
Output format:
- Name1
- Name2
"""), //
CODE_ONLY_V1(
"""
You get the {FEATURES} of a software project. Your task is to summarize the {FEATURES} w.r.t. the high-level architecture of the system. Try to identify possible components.
{FEATURES}:
%s
""",
"""
Now provide a list that only covers the component names. Omit common prefixes and suffixes in the names in camel case.
Output format:
- Name1
- Name2
"""), //
AGGREGATION_V1("""
You get a list of possible component names. Your task is to aggregate the list and remove duplicates.
Omit common prefixes and suffixes in the names in camel case.
Output format:
- Name1
- Name2
Possible component names:
%s
""");

private final List<String> templates;

LLMArchitecturePrompt(String... templates) {
this.templates = List.of(templates);
}

public List<String> getTemplates() {
if (this == CODE_ONLY_V1)
throw new IllegalArgumentException("This method is not supported for this enum value");
return templates;
}

public List<String> getTemplates(Features features) {
return templates.stream().map(it -> it.replace("{FEATURES}", features.toString())).toList();
}

public enum Features {
PACKAGES, PACKAGES_AND_THEIR_CLASSES;

@Override
public String toString() {
return super.toString().charAt(0) + super.toString().toLowerCase().substring(1).replace("_", " ");
}
}
}
Loading

0 comments on commit 1fe9f00

Please sign in to comment.