-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
1,024 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,3 +127,4 @@ pipeline/src/main/resources/evaluations/ | |
.idea | ||
|
||
*.iml | ||
tests/integration-tests/tests-tlr/cache-llm |
69 changes: 69 additions & 0 deletions
69
...t/kastel/mcse/ardoco/tlr/execution/ArDoCoForSadSamViaLlmCodeTraceabilityLinkRecovery.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* Licensed under MIT 2023-2024. */ | ||
package edu.kit.kastel.mcse.ardoco.tlr.execution; | ||
|
||
import java.io.File; | ||
import java.util.SortedMap; | ||
|
||
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities; | ||
import edu.kit.kastel.mcse.ardoco.core.common.util.DataRepositoryHelper; | ||
import edu.kit.kastel.mcse.ardoco.core.execution.ArDoCo; | ||
import edu.kit.kastel.mcse.ardoco.core.execution.runner.ArDoCoRunner; | ||
import edu.kit.kastel.mcse.ardoco.tlr.codetraceability.SadSamCodeTraceabilityLinkRecovery; | ||
import edu.kit.kastel.mcse.ardoco.tlr.codetraceability.SamCodeTraceabilityLinkRecovery; | ||
import edu.kit.kastel.mcse.ardoco.tlr.connectiongenerator.ConnectionGenerator; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.agents.ArCoTLModelProviderAgent; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.agents.LLMArchitectureProviderAgent; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LLMArchitecturePrompt; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LargeLanguageModel; | ||
import edu.kit.kastel.mcse.ardoco.tlr.recommendationgenerator.RecommendationGenerator; | ||
import edu.kit.kastel.mcse.ardoco.tlr.text.providers.TextPreprocessingAgent; | ||
import edu.kit.kastel.mcse.ardoco.tlr.textextraction.TextExtraction; | ||
|
||
public class ArDoCoForSadSamViaLlmCodeTraceabilityLinkRecovery extends ArDoCoRunner { | ||
|
||
public ArDoCoForSadSamViaLlmCodeTraceabilityLinkRecovery(String projectName) { | ||
super(projectName); | ||
} | ||
|
||
public void setUp(File inputText, File inputCode, SortedMap<String, String> additionalConfigs, File outputDir, LargeLanguageModel largeLanguageModel, | ||
LLMArchitecturePrompt documentationExtractionPrompt, LLMArchitecturePrompt codeExtractionPrompt, LLMArchitecturePrompt.Features codeFeatures, | ||
LLMArchitecturePrompt aggregationPrompt) { | ||
definePipeline(inputText, inputCode, additionalConfigs, largeLanguageModel, documentationExtractionPrompt, codeExtractionPrompt, codeFeatures, | ||
aggregationPrompt); | ||
setOutputDirectory(outputDir); | ||
isSetUp = true; | ||
} | ||
|
||
private void definePipeline(File inputText, File inputCode, SortedMap<String, String> additionalConfigs, LargeLanguageModel largeLanguageModel, | ||
LLMArchitecturePrompt documentationExtractionPrompt, LLMArchitecturePrompt codeExtractionPrompt, LLMArchitecturePrompt.Features codeFeatures, | ||
LLMArchitecturePrompt aggregationPrompt) { | ||
ArDoCo arDoCo = this.getArDoCo(); | ||
var dataRepository = arDoCo.getDataRepository(); | ||
|
||
var text = CommonUtilities.readInputText(inputText); | ||
if (text.isBlank()) { | ||
throw new IllegalArgumentException("Cannot deal with empty input text. Maybe there was an error reading the file."); | ||
} | ||
DataRepositoryHelper.putInputText(dataRepository, text); | ||
|
||
arDoCo.addPipelineStep(TextPreprocessingAgent.get(additionalConfigs, dataRepository)); | ||
|
||
var codeConfiguration = ArCoTLModelProviderAgent.getCodeConfiguration(inputCode); | ||
|
||
ArCoTLModelProviderAgent arCoTLModelProviderAgent = ArCoTLModelProviderAgent.getArCoTLModelProviderAgent(dataRepository, additionalConfigs, null, | ||
codeConfiguration); | ||
arDoCo.addPipelineStep(arCoTLModelProviderAgent); | ||
|
||
LLMArchitectureProviderAgent llmArchitectureProviderAgent = new LLMArchitectureProviderAgent(dataRepository, largeLanguageModel, | ||
documentationExtractionPrompt, codeExtractionPrompt, codeFeatures, aggregationPrompt); | ||
arDoCo.addPipelineStep(llmArchitectureProviderAgent); | ||
|
||
arDoCo.addPipelineStep(TextExtraction.get(additionalConfigs, dataRepository)); | ||
arDoCo.addPipelineStep(RecommendationGenerator.get(additionalConfigs, dataRepository)); | ||
arDoCo.addPipelineStep(ConnectionGenerator.get(additionalConfigs, dataRepository)); | ||
|
||
arDoCo.addPipelineStep(SamCodeTraceabilityLinkRecovery.get(additionalConfigs, dataRepository)); | ||
|
||
arDoCo.addPipelineStep(SadSamCodeTraceabilityLinkRecovery.get(additionalConfigs, dataRepository)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 20 additions & 0 deletions
20
.../main/java/edu/kit/kastel/mcse/ardoco/tlr/models/agents/LLMArchitectureProviderAgent.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* Licensed under MIT 2024. */ | ||
package edu.kit.kastel.mcse.ardoco.tlr.models.agents; | ||
|
||
import java.util.List; | ||
|
||
import edu.kit.kastel.mcse.ardoco.core.data.DataRepository; | ||
import edu.kit.kastel.mcse.ardoco.core.pipeline.agent.PipelineAgent; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LLMArchitecturePrompt; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LLMArchitectureProviderInformant; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.informants.LargeLanguageModel; | ||
|
||
public class LLMArchitectureProviderAgent extends PipelineAgent { | ||
|
||
public LLMArchitectureProviderAgent(DataRepository dataRepository, LargeLanguageModel largeLanguageModel, | ||
LLMArchitecturePrompt documentationExtractionPrompt, LLMArchitecturePrompt codeExtractionPrompt, LLMArchitecturePrompt.Features codeFeatures, | ||
LLMArchitecturePrompt aggregationPrompt) { | ||
super(List.of(new LLMArchitectureProviderInformant(dataRepository, largeLanguageModel, documentationExtractionPrompt, codeExtractionPrompt, | ||
codeFeatures, aggregationPrompt)), LLMArchitectureProviderAgent.class.getSimpleName(), dataRepository); | ||
} | ||
} |
68 changes: 68 additions & 0 deletions
68
...se/ardoco/tlr/models/connectors/generators/architecture/raw/RawArchitectureExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* Licensed under MIT 2024. */ | ||
package edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.architecture.raw; | ||
|
||
import java.io.IOException; | ||
import java.io.UncheckedIOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.TreeSet; | ||
import java.util.UUID; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType; | ||
import edu.kit.kastel.mcse.ardoco.core.api.models.ModelType; | ||
import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel; | ||
import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.architecture.ArchitectureComponent; | ||
import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.architecture.ArchitectureItem; | ||
import edu.kit.kastel.mcse.ardoco.tlr.models.connectors.generators.architecture.ArchitectureExtractor; | ||
|
||
/** | ||
* An extractor for raw architecture models. Expected format: | ||
* <p> | ||
* Name1::ID<br> | ||
* Name2::ID | ||
* </p> | ||
*/ | ||
public class RawArchitectureExtractor extends ArchitectureExtractor { | ||
private static final Logger logger = LoggerFactory.getLogger(RawArchitectureExtractor.class); | ||
|
||
public RawArchitectureExtractor(String modelPath) { | ||
super(modelPath); | ||
} | ||
|
||
@Override | ||
public ArchitectureModel extractModel() { | ||
try { | ||
List<String> lines = Files.readAllLines(Path.of(this.path)); | ||
|
||
List<ArchitectureItem> components = new ArrayList<>(); | ||
for (String line : lines) { | ||
if (line.isBlank()) { | ||
continue; | ||
} | ||
String[] parts = line.split("::", 2); | ||
if (parts.length != 2) { | ||
logger.warn("Line has no ID. Using random ID: {}", line); | ||
parts = Arrays.copyOf(parts, 2); | ||
parts[1] = parts[0] + "_" + UUID.randomUUID(); | ||
} | ||
components.add(new ArchitectureComponent(parts[0].trim(), parts[1].trim(), new TreeSet<>(), new TreeSet<>(), new TreeSet<>(), "component")); | ||
} | ||
|
||
return new ArchitectureModel(components); | ||
|
||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} | ||
} | ||
|
||
@Override | ||
public ModelType getModelType() { | ||
return ArchitectureModelType.RAW; | ||
} | ||
} |
75 changes: 75 additions & 0 deletions
75
...c/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/CachedChatLanguageModel.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* Licensed under MIT 2024. */ | ||
package edu.kit.kastel.mcse.ardoco.tlr.models.informants; | ||
|
||
import static edu.kit.kastel.mcse.ardoco.core.common.JsonHandling.createObjectMapper; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import com.fasterxml.jackson.core.StreamReadConstraints; | ||
import com.fasterxml.jackson.core.StreamWriteConstraints; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
|
||
import edu.kit.kastel.mcse.ardoco.core.common.JsonHandling; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import com.fasterxml.jackson.core.type.TypeReference; | ||
|
||
import dev.langchain4j.data.message.AiMessage; | ||
import dev.langchain4j.data.message.ChatMessage; | ||
import dev.langchain4j.model.chat.ChatLanguageModel; | ||
import dev.langchain4j.model.output.Response; | ||
import edu.kit.kastel.mcse.ardoco.core.architecture.Deterministic; | ||
|
||
@Deterministic | ||
public class CachedChatLanguageModel implements ChatLanguageModel { | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(CachedChatLanguageModel.class); | ||
|
||
private static final String CACHE_DIR = "cache-llm/"; | ||
static { | ||
new File(CACHE_DIR).mkdirs(); | ||
} | ||
|
||
private final ChatLanguageModel chatLanguageModel; | ||
private final String cacheKey; | ||
|
||
private Map<String, String> cache = new LinkedHashMap<>(); | ||
|
||
public CachedChatLanguageModel(ChatLanguageModel chatLanguageModel, String cacheKey) { | ||
this.chatLanguageModel = chatLanguageModel; | ||
this.cacheKey = cacheKey; | ||
try { | ||
this.cache = createObjectMapper().readValue(new File(CACHE_DIR + cacheKey + "-cache.json"), new TypeReference<>() { | ||
}); | ||
} catch (IOException e) { | ||
logger.debug("Could not read cache file", e); | ||
} | ||
} | ||
|
||
@Override | ||
public Response<AiMessage> generate(List<ChatMessage> messages) { | ||
if (cache.containsKey(messages.toString())) { | ||
return Response.from(new AiMessage(cache.get(messages.toString()))); | ||
} | ||
Response<AiMessage> response = chatLanguageModel.generate(messages); | ||
cache.put(messages.toString(), response.content().text()); | ||
try { | ||
createObjectMapper().writeValue(new File(CACHE_DIR + cacheKey + "-cache.json"), cache); | ||
} catch (IOException e) { | ||
logger.error("Could not write cache file", e); | ||
} | ||
return response; | ||
} | ||
|
||
private static ObjectMapper createObjectMapper() { | ||
ObjectMapper oom = JsonHandling.createObjectMapper(); | ||
oom.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxNameLength(100000).build()); | ||
return oom; | ||
} | ||
} |
69 changes: 69 additions & 0 deletions
69
...src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitecturePrompt.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* Licensed under MIT 2024. */ | ||
package edu.kit.kastel.mcse.ardoco.tlr.models.informants; | ||
|
||
import java.util.List; | ||
|
||
public enum LLMArchitecturePrompt { | ||
DOCUMENTATION_ONLY_V1( | ||
""" | ||
Your task is to identify the high-level components based on a software architecture documentation. In a first step, you shall elaborate on the following documentation: | ||
%s | ||
""", | ||
""" | ||
Now provide a list that only covers the component names in camel case. Omit common prefixes and suffixes. | ||
Output format: | ||
- Name1 | ||
- Name2 | ||
"""), // | ||
CODE_ONLY_V1( | ||
""" | ||
You get the {FEATURES} of a software project. Your task is to summarize the {FEATURES} w.r.t. the high-level architecture of the system. Try to identify possible components. | ||
{FEATURES}: | ||
%s | ||
""", | ||
""" | ||
Now provide a list that only covers the component names. Omit common prefixes and suffixes in the names in camel case. | ||
Output format: | ||
- Name1 | ||
- Name2 | ||
"""), // | ||
AGGREGATION_V1(""" | ||
You get a list of possible component names. Your task is to aggregate the list and remove duplicates. | ||
Omit common prefixes and suffixes in the names in camel case. | ||
Output format: | ||
- Name1 | ||
- Name2 | ||
Possible component names: | ||
%s | ||
"""); | ||
|
||
private final List<String> templates; | ||
|
||
LLMArchitecturePrompt(String... templates) { | ||
this.templates = List.of(templates); | ||
} | ||
|
||
public List<String> getTemplates() { | ||
if (this == CODE_ONLY_V1) | ||
throw new IllegalArgumentException("This method is not supported for this enum value"); | ||
return templates; | ||
} | ||
|
||
public List<String> getTemplates(Features features) { | ||
return templates.stream().map(it -> it.replace("{FEATURES}", features.toString())).toList(); | ||
} | ||
|
||
public enum Features { | ||
PACKAGES, PACKAGES_AND_THEIR_CLASSES; | ||
|
||
@Override | ||
public String toString() { | ||
return super.toString().charAt(0) + super.toString().toLowerCase().substring(1).replace("_", " "); | ||
} | ||
} | ||
} |
Oops, something went wrong.