Skip to content

Commit

Permalink
Move packages for similarity
Browse files Browse the repository at this point in the history
  • Loading branch information
dfuchss committed Nov 29, 2024
1 parent 59a6567 commit 3e74f00
Show file tree
Hide file tree
Showing 42 changed files with 114 additions and 98 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util;
package edu.kit.kastel.mcse.ardoco.core.common.similarity;

import static edu.kit.kastel.mcse.ardoco.core.common.util.SimilarityUtils.cosineSimilarity;
import static edu.kit.kastel.mcse.ardoco.core.common.util.SimilarityUtils.uniqueDot;
import static edu.kit.kastel.mcse.ardoco.core.common.similarity.SimilarityUtils.cosineSimilarity;
import static edu.kit.kastel.mcse.ardoco.core.common.similarity.SimilarityUtils.uniqueDot;

import java.util.function.ToDoubleBiFunction;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util;
package edu.kit.kastel.mcse.ardoco.core.common.similarity;

import java.util.Collection;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2021-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util;
package edu.kit.kastel.mcse.ardoco.core.common.similarity;

import java.io.Serializable;
import java.util.ArrayList;
Expand All @@ -21,8 +21,10 @@
import edu.kit.kastel.mcse.ardoco.core.api.text.PhraseType;
import edu.kit.kastel.mcse.ardoco.core.api.text.Word;
import edu.kit.kastel.mcse.ardoco.core.architecture.Deterministic;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimUtils;
import edu.kit.kastel.mcse.ardoco.core.common.tuple.Pair;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimUtils;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonTextToolsConfig;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;

/**
* This class is a utility class.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.util.Objects;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.io.BufferedReader;
import java.io.File;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.io.Serializable;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.io.Serializable;
import java.util.function.BiFunction;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.util.Objects;
import java.util.stream.IntStream;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2022-2023. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.util.ArrayList;

Expand All @@ -8,12 +8,12 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.equality.EqualityMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.jarowinkler.JaroWinklerMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.levenshtein.LevenshteinMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.ngram.NgramMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.sewordsim.SEWordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonTextToolsConfig;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.equality.EqualityMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.jarowinkler.JaroWinklerMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.levenshtein.LevenshteinMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.ngram.NgramMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.sewordsim.SEWordSimMeasure;

/**
* Responsible for loading the word similarity measures that should be enabled according to the
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.io.Serializable;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim;

import java.io.Serializable;
import java.util.ArrayList;
Expand All @@ -13,10 +13,10 @@
import org.sqlite.SQLiteOpenMode;

import edu.kit.kastel.mcse.ardoco.core.api.text.Word;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.equality.EqualityMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy.AverageStrategy;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy.ComparisonStrategy;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy.SimilarityStrategy;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.equality.EqualityMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy.AverageStrategy;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy.ComparisonStrategy;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy.SimilarityStrategy;

/**
* A static class that provides various utility methods to calculate similarity between different kinds of objects. This class statically keeps a reference to a
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.equality;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.equality;

import java.util.Locale;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacterSequence;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacterSequence;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

/**
* This word similarity measure just checks whether the most appropriate string representations of the passed objects are equal.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.glove;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.glove;

import java.nio.file.Path;
import java.sql.SQLException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.vector.RetrieveVectorException;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.vector.VectorBasedWordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.vector.VectorSqliteDatabase;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.vector.WordVectorDataSource;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonTextToolsConfig;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.vector.RetrieveVectorException;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.vector.VectorBasedWordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.vector.VectorSqliteDatabase;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.vector.WordVectorDataSource;

/**
* This word similarity measures utilizes GloVe trained word vector representations to calculate word similarity. It retrieves vectors for each word and
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.jarowinkler;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.jarowinkler;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonTextToolsConfig;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;

/**
* This word similarity measure uses the jaro winkler algorithm to calculate similarity.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.jarowinkler;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.jarowinkler;

import java.io.Serializable;
import java.util.Arrays;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacter;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacterMatchFunctions;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacterSequence;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacter;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacterMatchFunctions;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacterSequence;

/**
* A similarity algorithm indicating the percentage of matched characters between two character sequences.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.levenshtein;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.levenshtein;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacterSequence;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonTextToolsConfig;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacterSequence;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;

/**
* This word similarity measure uses the levenshtein distance (also sometimes called edit distance) algorithm to calculate word similarity. This measure is
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.levenshtein;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.levenshtein;

import java.io.Serializable;
import java.util.Arrays;

import org.apache.commons.text.similarity.LevenshteinDistance;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacter;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacterMatchFunctions;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.UnicodeCharacterSequence;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacter;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacterMatchFunctions;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.UnicodeCharacterSequence;

/**
* An algorithm for measuring the difference between two character sequences.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.ngram;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.ngram;

import java.util.Objects;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonTextToolsConfig;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;

/**
* This word similarity measure uses the N-gram word distance function defined by Kondrak 2005.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2022-2023. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.sewordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.sewordsim;

import java.nio.file.Files;
import java.nio.file.Path;
Expand All @@ -13,7 +13,7 @@

import org.sqlite.SQLiteConfig;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimUtils;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimUtils;
import opennlp.tools.stemmer.PorterStemmer;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.measures.sewordsim;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.measures.sewordsim;

import java.nio.file.Path;
import java.sql.SQLException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.util.CommonTextToolsConfig;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;

/**
* This word similarity measures utilizes the SEWordSim database from Tian et al. 2014
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/* Licensed under MIT 2022-2023. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

/**
* This comparison strategy accepts any word pair as similar if at least one of the specified word similarity measures
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

public class AverageStrategy implements SimilarityStrategy {
@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
/* Licensed under MIT 2022-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.io.Serializable;
import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

/**
* A comparison strategy determines how the verdicts of multiple WSMs regarding a specific comparison are combined.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/* Licensed under MIT 2022-2023. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

/**
* This comparison strategy accepts any word pair as similar if the majority of specified word similarity measures
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

public class MaximumStrategy implements SimilarityStrategy {
@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.util.ArrayList;
import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

public class MedianStrategy implements SimilarityStrategy {
@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.strategy;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.strategy;

import java.io.Serializable;
import java.util.List;

import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.WordSimMeasure;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.ComparisonContext;
import edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.WordSimMeasure;

public interface SimilarityStrategy extends Serializable {
/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Licensed under MIT 2022-2023. */
package edu.kit.kastel.mcse.ardoco.core.common.util.wordsim.vector;
package edu.kit.kastel.mcse.ardoco.core.common.similarity.wordsim.vector;

/**
* An exception that can occur while trying to retrieve a vector.
Expand Down
Loading

0 comments on commit 3e74f00

Please sign in to comment.