Skip to content

Commit

Permalink
Remove abbreviations & Cache
Browse files Browse the repository at this point in the history
  • Loading branch information
dfuchss committed Nov 29, 2024
1 parent b29662a commit 8fb4a7d
Show file tree
Hide file tree
Showing 12 changed files with 225 additions and 1,329 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.tlr.textextraction;

import java.util.LinkedHashSet;
import java.util.List;

import org.eclipse.collections.api.factory.SortedMaps;
import org.eclipse.collections.api.factory.SortedSets;
import org.eclipse.collections.api.list.ImmutableList;
Expand All @@ -14,11 +11,8 @@

import edu.kit.kastel.mcse.ardoco.core.api.stage.textextraction.MappingKind;
import edu.kit.kastel.mcse.ardoco.core.api.stage.textextraction.NounMapping;
import edu.kit.kastel.mcse.ardoco.core.api.stage.textextraction.PhraseAbbreviation;
import edu.kit.kastel.mcse.ardoco.core.api.stage.textextraction.TextState;
import edu.kit.kastel.mcse.ardoco.core.api.stage.textextraction.TextStateStrategy;
import edu.kit.kastel.mcse.ardoco.core.api.stage.textextraction.WordAbbreviation;
import edu.kit.kastel.mcse.ardoco.core.api.text.Phrase;
import edu.kit.kastel.mcse.ardoco.core.api.text.Word;
import edu.kit.kastel.mcse.ardoco.core.architecture.Deterministic;
import edu.kit.kastel.mcse.ardoco.core.data.Confidence;
Expand All @@ -38,20 +32,21 @@ protected DefaultTextStateStrategy(GlobalConfiguration globalConfiguration) {
public void setState(TextState textState) {
if (this.textState != null) {
throw new IllegalStateException("The text state is already set");
} else if (textState instanceof TextStateImpl) {
}
if (textState instanceof TextStateImpl) {
this.textState = (TextStateImpl) textState;
} else {
throw new IllegalArgumentException("The text state must be an instance of TextStateImpl");
}
}

public TextStateImpl getTextState() {
return textState;
return this.textState;
}

/**
* Creates a new noun mapping using the parameters without adding it to the state.
*
*
* @param words the words
* @param distribution the distribution of the mappings kinds
* @param referenceWords the reference words
Expand All @@ -62,7 +57,7 @@ public TextStateImpl getTextState() {
public NounMapping createNounMappingStateless(ImmutableSortedSet<Word> words, ImmutableSortedMap<MappingKind, Confidence> distribution,
ImmutableList<Word> referenceWords, ImmutableList<String> surfaceForms, String reference) {
if (reference == null) {
reference = calculateNounMappingReference(referenceWords);
reference = this.calculateNounMappingReference(referenceWords);
}

return new NounMappingImpl(words, distribution.toImmutable(), referenceWords, surfaceForms, reference);
Expand All @@ -71,7 +66,7 @@ public NounMapping createNounMappingStateless(ImmutableSortedSet<Word> words, Im
@Override
public ImmutableList<NounMapping> getNounMappingsWithSimilarReference(String reference) {
return this.textState.getNounMappings()
.select(nm -> globalConfiguration.getSimilarityUtils().areWordsSimilar(reference, nm.getReference()))
.select(nm -> this.globalConfiguration.getSimilarityUtils().areWordsSimilar(reference, nm.getReference()))
.toImmutable();
}

Expand All @@ -83,8 +78,8 @@ public NounMapping addNounMapping(ImmutableSortedSet<Word> words, ImmutableSorte
throw new IllegalArgumentException("Atleast 1 claimant is required");
}

NounMapping nounMapping = createNounMappingStateless(words, distribution, referenceWords, surfaceForms, reference);
getTextState().addNounMappingAddPhraseMapping(nounMapping);
NounMapping nounMapping = this.createNounMappingStateless(words, distribution, referenceWords, surfaceForms, reference);
this.getTextState().addNounMappingAddPhraseMapping(nounMapping);
return nounMapping;
}

Expand All @@ -94,39 +89,40 @@ public NounMapping addNounMapping(ImmutableSortedSet<Word> words, MappingKind ki
MutableSortedMap<MappingKind, Confidence> distribution = SortedMaps.mutable.empty();
distribution.put(MappingKind.NAME, new Confidence(DEFAULT_AGGREGATOR));
distribution.put(MappingKind.TYPE, new Confidence(DEFAULT_AGGREGATOR));
var nounMapping = createNounMappingStateless(words, distribution.toImmutable(), referenceWords, surfaceForms, reference);
var nounMapping = this.createNounMappingStateless(words, distribution.toImmutable(), referenceWords, surfaceForms, reference);
nounMapping.addKindWithProbability(kind, claimant, probability);
getTextState().addNounMappingAddPhraseMapping(nounMapping);
this.getTextState().addNounMappingAddPhraseMapping(nounMapping);
return nounMapping;
}

public NounMapping mergeNounMappings(NounMapping nounMapping, MutableList<NounMapping> nounMappingsToMerge, Claimant claimant) {
for (NounMapping nounMappingToMerge : nounMappingsToMerge) {

if (!textState.getNounMappings().contains(nounMappingToMerge)) {
if (!this.textState.getNounMappings().contains(nounMappingToMerge)) {

final NounMapping finalNounMappingToMerge = nounMappingToMerge;
var fittingNounMappings = textState.getNounMappings().select(nm -> nm.getWords().containsAllIterable(finalNounMappingToMerge.getWords()));
var fittingNounMappings = this.textState.getNounMappings().select(nm -> nm.getWords().containsAllIterable(finalNounMappingToMerge.getWords()));
if (fittingNounMappings.isEmpty()) {
continue;
} else if (fittingNounMappings.size() == 1) {
}
if (fittingNounMappings.size() == 1) {
nounMappingToMerge = fittingNounMappings.get(0);
} else {
throw new IllegalStateException();
}
}

assert textState.getNounMappings().contains(nounMappingToMerge);
assert this.textState.getNounMappings().contains(nounMappingToMerge);

var references = nounMapping.getReferenceWords().toList();
references.addAllIterable(nounMappingToMerge.getReferenceWords());
textState.mergeNounMappings(nounMapping, nounMappingToMerge, claimant, references.toImmutable());
this.textState.mergeNounMappings(nounMapping, nounMappingToMerge, claimant, references.toImmutable());

var mergedWords = SortedSets.mutable.empty();
mergedWords.addAllIterable(nounMapping.getWords());
mergedWords.addAllIterable(nounMappingToMerge.getWords());

var mergedNounMapping = textState.getNounMappings().select(nm -> nm.getWords().toSortedSet().equals(mergedWords));
var mergedNounMapping = this.textState.getNounMappings().select(nm -> nm.getWords().toSortedSet().equals(mergedWords));

assert (mergedNounMapping.size() == 1);

Expand All @@ -143,38 +139,4 @@ protected final Confidence putAllConfidencesTogether(Confidence confidence, Conf
return result;
}

@Override
public WordAbbreviation addOrExtendWordAbbreviation(String abbreviation, Word word) {
var wordAbbreviation = getTextState().getWordAbbreviations(word).stream().filter(e -> e.getAbbreviation().equals(abbreviation)).findFirst();
if (wordAbbreviation.isPresent()) {
return extendWordAbbreviation(wordAbbreviation.orElseThrow(), word);
} else {
var newWordAbbreviation = new WordAbbreviation(abbreviation, new LinkedHashSet<>(List.of(word)));
getTextState().addWordAbbreviation(newWordAbbreviation);
return newWordAbbreviation;
}
}

protected WordAbbreviation extendWordAbbreviation(WordAbbreviation wordAbbreviation, Word word) {
wordAbbreviation.addWord(word);
return wordAbbreviation;
}

@Override
public PhraseAbbreviation addOrExtendPhraseAbbreviation(String abbreviation, Phrase phrase) {
var phraseAbbreviation = getTextState().getPhraseAbbreviations(phrase).stream().filter(e -> e.getAbbreviation().equals(abbreviation)).findFirst();
if (phraseAbbreviation.isPresent()) {
return extendPhraseAbbreviation(phraseAbbreviation.orElseThrow(), phrase);
} else {
var newPhraseAbbreviation = new PhraseAbbreviation(abbreviation, new LinkedHashSet<>(List.of(phrase)));
getTextState().addPhraseAbbreviation(newPhraseAbbreviation);
return newPhraseAbbreviation;
}
}

protected PhraseAbbreviation extendPhraseAbbreviation(PhraseAbbreviation phraseAbbreviation, Phrase phrase) {
phraseAbbreviation.addPhrase(phrase);
return phraseAbbreviation;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import edu.kit.kastel.mcse.ardoco.core.api.stage.textextraction.TextStateStrategy;
import edu.kit.kastel.mcse.ardoco.core.data.DataRepository;
import edu.kit.kastel.mcse.ardoco.core.pipeline.AbstractExecutionStage;
import edu.kit.kastel.mcse.ardoco.tlr.textextraction.agents.AbbreviationAgent;
import edu.kit.kastel.mcse.ardoco.tlr.textextraction.agents.InitialTextAgent;
import edu.kit.kastel.mcse.ardoco.tlr.textextraction.agents.PhraseAgent;

Expand All @@ -21,10 +20,7 @@ public class TextExtraction extends AbstractExecutionStage {
* Instantiates a new text extractor.
*/
public TextExtraction(DataRepository dataRepository) {
super(List.of(//
new InitialTextAgent(dataRepository),//
new PhraseAgent(dataRepository),//
new AbbreviationAgent(dataRepository)), "TextExtraction", dataRepository);
super(List.of(new InitialTextAgent(dataRepository), new PhraseAgent(dataRepository)), "TextExtraction", dataRepository);
}

/**
Expand All @@ -42,7 +38,7 @@ public static TextExtraction get(SortedMap<String, String> additionalConfigs, Da

@Override
protected void initializeState() {
var dataRepository = getDataRepository();
var dataRepository = this.getDataRepository();
var optionalTextState = dataRepository.getData(TextState.ID, TextStateImpl.class);
if (optionalTextState.isEmpty()) {
TextStateStrategy tts = new OriginalTextStateStrategy(dataRepository.getGlobalConfiguration());
Expand Down
Loading

0 comments on commit 8fb4a7d

Please sign in to comment.