diff --git a/06-web-api/full-text-search.sln b/06-web-api/full-text-search.sln new file mode 100644 index 0000000..2afceb5 --- /dev/null +++ b/06-web-api/full-text-search.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DocumentManagement", "src\DocumentManagement\DocumentManagement.csproj", "{D471FD38-26BA-4DEF-96A2-982F235AEA01}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "test", "test\test.csproj", "{FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MyWebApplication", "src\MyWebApplication\MyWebApplication.csproj", "{FD4AE171-A581-4FD2-899C-C1BB5EAFE641}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D471FD38-26BA-4DEF-96A2-982F235AEA01}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D471FD38-26BA-4DEF-96A2-982F235AEA01}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D471FD38-26BA-4DEF-96A2-982F235AEA01}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D471FD38-26BA-4DEF-96A2-982F235AEA01}.Release|Any CPU.Build.0 = Release|Any CPU + {FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Release|Any CPU.Build.0 = Release|Any CPU + {FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/06-web-api/src/DocumentManagement/DocumentManagement.csproj b/06-web-api/src/DocumentManagement/DocumentManagement.csproj new file mode 100644 index 0000000..3eee87d --- /dev/null +++ b/06-web-api/src/DocumentManagement/DocumentManagement.csproj @@ -0,0 +1,10 @@ + + + + net8.0 + enable + enable + Mohaymen.FullTextSearch.DocumentManagement + + + diff --git a/06-web-api/src/DocumentManagement/Interfaces/IFileReader.cs b/06-web-api/src/DocumentManagement/Interfaces/IFileReader.cs new file mode 100644 index 0000000..6e03327 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Interfaces/IFileReader.cs @@ -0,0 +1,8 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +public interface IFileReader +{ + FileCollection ReadAllFiles(string folderPath); +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Interfaces/IInvertedIndex.cs b/06-web-api/src/DocumentManagement/Interfaces/IInvertedIndex.cs new file mode 100644 index 0000000..10df553 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Interfaces/IInvertedIndex.cs @@ -0,0 +1,9 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +public interface IInvertedIndex +{ + HashSet AllDocuments { get; } + HashSet GetDocumentsByKeyword(Keyword keyword); +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Interfaces/IInvertedIndexBuilder.cs b/06-web-api/src/DocumentManagement/Interfaces/IInvertedIndexBuilder.cs new file mode 100644 index 0000000..b2c08af --- /dev/null +++ b/06-web-api/src/DocumentManagement/Interfaces/IInvertedIndexBuilder.cs @@ -0,0 +1,9 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +public interface IInvertedIndexBuilder +{ + IInvertedIndex Build(); + IInvertedIndexBuilder IndexFilesWords(FileCollection fileCollection); +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Interfaces/ISearchStrategy.cs b/06-web-api/src/DocumentManagement/Interfaces/ISearchStrategy.cs new file mode 100644 index 0000000..5c05e85 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Interfaces/ISearchStrategy.cs @@ -0,0 +1,8 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +public interface ISearchStrategy +{ + void FilterDocuments(HashSet documents, List keywords, IInvertedIndex invertedIndex); +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Interfaces/ISearcher.cs b/06-web-api/src/DocumentManagement/Interfaces/ISearcher.cs new file mode 100644 index 0000000..7f4f87a --- /dev/null +++ b/06-web-api/src/DocumentManagement/Interfaces/ISearcher.cs @@ -0,0 +1,7 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; +namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +public interface ISearcher +{ + ICollection Search(List query); +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Interfaces/ITokenizer.cs b/06-web-api/src/DocumentManagement/Interfaces/ITokenizer.cs new file mode 100644 index 0000000..493476c --- /dev/null +++ b/06-web-api/src/DocumentManagement/Interfaces/ITokenizer.cs @@ -0,0 +1,8 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +public interface ITokenizer +{ + List ExtractKeywords(string text); +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Models/AdvancedInvertedIndex.cs b/06-web-api/src/DocumentManagement/Models/AdvancedInvertedIndex.cs new file mode 100644 index 0000000..b48cd46 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Models/AdvancedInvertedIndex.cs @@ -0,0 +1,90 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Models; + +public class AdvancedInvertedIndex : IInvertedIndex, IEquatable +{ + private readonly Dictionary> _invertedIndexMap = []; + public HashSet AllDocuments { get; } = []; + private ITokenizer _tokenizer; + + public AdvancedInvertedIndex(ITokenizer tokenizer) + { + _tokenizer = tokenizer; + } + + public void AddDocumentToKeyword(Keyword keyword, KeywordInfo keywordInfo) + { + AllDocuments.Add(keywordInfo.Document); + + if (!_invertedIndexMap.ContainsKey(keyword)) + _invertedIndexMap.Add(keyword, []); + + _invertedIndexMap[keyword].Add(keywordInfo); + } + public HashSet GetDocumentsByKeyword(Keyword phrase) + { + var phraseWords = _tokenizer.ExtractKeywords(phrase.Word); + + if (!phraseWords.Any()) + { + return []; + } + + foreach (var phraseWord in phraseWords) + { + if (!_invertedIndexMap.ContainsKey(phraseWord)) + { + return []; + } + } + + var keywordInfos = new HashSet(_invertedIndexMap[phraseWords[0]]); + + for (int i=1; i(_invertedIndexMap[phraseWords[i]]); + currentKeywordInfos.RemoveWhere(keywordInfo => + !keywordInfos.Contains(new KeywordInfo(keywordInfo.Document, keywordInfo.Position - 1)) + ); + + keywordInfos = currentKeywordInfos; + } + + return keywordInfos.Select(keywordInfo => keywordInfo.Document).ToHashSet(); + } + + + public bool Equals(AdvancedInvertedIndex? other) + { + if (other is null) return false; + + if (_invertedIndexMap.Count != other._invertedIndexMap.Count) + return false; + + foreach (var kvp in _invertedIndexMap) + { + if (!other._invertedIndexMap.TryGetValue(kvp.Key, out var otherSet)) + return false; + + if (!kvp.Value.SetEquals(otherSet)) + return false; + } + + var areDocumentsEqual = AllDocuments.SetEquals(other.AllDocuments); + return areDocumentsEqual; + } + + public override bool Equals(object? obj) + { + if (ReferenceEquals(null, obj)) return false; + if (ReferenceEquals(this, obj)) return true; + if (obj.GetType() != this.GetType()) return false; + return Equals((AdvancedInvertedIndex)obj); + } + + public override int GetHashCode() + { + return HashCode.Combine(_invertedIndexMap, AllDocuments); + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Models/FileCollection.cs b/06-web-api/src/DocumentManagement/Models/FileCollection.cs new file mode 100644 index 0000000..1c69390 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Models/FileCollection.cs @@ -0,0 +1,21 @@ +namespace Mohaymen.FullTextSearch.DocumentManagement.Models; + +public class FileCollection +{ + private readonly Dictionary _filesDataDictionary = []; + + public void AddFile(string filePath, string fileContent) => + _filesDataDictionary.Add(filePath, fileContent); + + public List GetFilesPath() => + _filesDataDictionary.Keys.ToList(); + + public string GetFileContent(string filePath) => + _filesDataDictionary[filePath]; + + public bool ContainsFile(string filePath) => + _filesDataDictionary.ContainsKey(filePath); + + public int FilesCount() => + _filesDataDictionary.Count; +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Models/InvertedIndex.cs b/06-web-api/src/DocumentManagement/Models/InvertedIndex.cs new file mode 100644 index 0000000..e8d5aaf --- /dev/null +++ b/06-web-api/src/DocumentManagement/Models/InvertedIndex.cs @@ -0,0 +1,26 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Models; + +public class InvertedIndex : IInvertedIndex +{ + private readonly Dictionary> _invertedIndexMap = []; + public HashSet AllDocuments { get; } = []; + + public void AddDocumentToKeyword(Keyword keyword, string document) + { + AllDocuments.Add(document); + + if (!_invertedIndexMap.ContainsKey(keyword)) + _invertedIndexMap.Add(keyword, []); + + _invertedIndexMap[keyword].Add(document); + } + + public HashSet GetDocumentsByKeyword(Keyword keyword) + { + _invertedIndexMap.TryGetValue(keyword, out HashSet? documents); + + return documents ?? []; + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Models/Keyword.cs b/06-web-api/src/DocumentManagement/Models/Keyword.cs new file mode 100644 index 0000000..3a7cb96 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Models/Keyword.cs @@ -0,0 +1,11 @@ +namespace Mohaymen.FullTextSearch.DocumentManagement.Models; + +public record Keyword +{ + public Keyword(string word) + { + Word = word.ToUpper(); + } + + public string Word { get; init; } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Models/KeywordInfo.cs b/06-web-api/src/DocumentManagement/Models/KeywordInfo.cs new file mode 100644 index 0000000..5cad16b --- /dev/null +++ b/06-web-api/src/DocumentManagement/Models/KeywordInfo.cs @@ -0,0 +1,3 @@ +namespace Mohaymen.FullTextSearch.DocumentManagement.Models; + +public record KeywordInfo(string Document, int Position); \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Models/SearchQuery.cs b/06-web-api/src/DocumentManagement/Models/SearchQuery.cs new file mode 100644 index 0000000..c95b864 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Models/SearchQuery.cs @@ -0,0 +1,5 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Models; + +public record SearchQuery(ISearchStrategy SearchStrategy, List Keywords); \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Services/Files/FileReader.cs b/06-web-api/src/DocumentManagement/Services/Files/FileReader.cs new file mode 100644 index 0000000..4d076df --- /dev/null +++ b/06-web-api/src/DocumentManagement/Services/Files/FileReader.cs @@ -0,0 +1,25 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; +namespace Mohaymen.FullTextSearch.DocumentManagement.Services.FilesService; + +public class FileReader : IFileReader +{ + public FileCollection ReadAllFiles(string folderPath) + { + var files = Directory.GetFiles(folderPath); + var fileCollection = files.Aggregate(new FileCollection(), AddFileToCollection); + + return fileCollection; + } + + private FileCollection AddFileToCollection(FileCollection collection, string filePath) + { + if (!collection.ContainsFile(filePath)) + collection.AddFile( + filePath, + File.ReadAllText(filePath) + ); + + return collection; + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Services/InvertedIndex/FilesAdvancedInvertedIndexBuilder.cs b/06-web-api/src/DocumentManagement/Services/InvertedIndex/FilesAdvancedInvertedIndexBuilder.cs new file mode 100644 index 0000000..01f67c5 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Services/InvertedIndex/FilesAdvancedInvertedIndexBuilder.cs @@ -0,0 +1,41 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; + +public class FilesAdvancedInvertedIndexBuilder : IInvertedIndexBuilder +{ + private AdvancedInvertedIndex _advancedInvertedIndex; + private ITokenizer _tokenizer; + + public FilesAdvancedInvertedIndexBuilder(ITokenizer tokenizer) + { + _advancedInvertedIndex = new(tokenizer); + _tokenizer = tokenizer; + } + + public IInvertedIndexBuilder IndexFilesWords(FileCollection fileCollection) + { + foreach (var filePath in fileCollection.GetFilesPath()) + { + var keywords = _tokenizer.ExtractKeywords(fileCollection.GetFileContent(filePath)); + UpdateInvertedIndexMap(keywords, filePath); + } + + return this; + } + + private void UpdateInvertedIndexMap(List keywords, string filePath) + { + for (var i = 0; i < keywords.Count; i++) + { + _advancedInvertedIndex.AddDocumentToKeyword(keywords[i], new KeywordInfo(filePath, i)); + } + } + + + public IInvertedIndex Build() + { + return _advancedInvertedIndex; + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Services/InvertedIndex/FilesInvertedIndexBuilder.cs b/06-web-api/src/DocumentManagement/Services/InvertedIndex/FilesInvertedIndexBuilder.cs new file mode 100644 index 0000000..7c54efa --- /dev/null +++ b/06-web-api/src/DocumentManagement/Services/InvertedIndex/FilesInvertedIndexBuilder.cs @@ -0,0 +1,40 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; +using Mohaymen.FullTextSearch.DocumentManagement.Utilities; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; + +public class FilesInvertedIndexBuilder : IInvertedIndexBuilder +{ + private readonly InvertedIndex _invertedIndex = new(); + private readonly ITokenizer _tokenizer; + + public FilesInvertedIndexBuilder(ITokenizer tokenizer) + { + _tokenizer = tokenizer; + } + + public IInvertedIndexBuilder IndexFilesWords(FileCollection fileCollection) + { + foreach (var filePath in fileCollection.GetFilesPath()) + { + var keywords = _tokenizer.ExtractKeywords(fileCollection.GetFileContent(filePath)); + UpdateInvertedIndexMap(keywords, filePath); + } + + return this; + } + + private void UpdateInvertedIndexMap(List keywords, string filePath) + { + foreach (var keyword in keywords) + { + _invertedIndex.AddDocumentToKeyword(keyword, filePath); + } + } + + public IInvertedIndex Build() + { + return _invertedIndex; + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Services/InvertedIndex/InvertedIndexSearcher.cs b/06-web-api/src/DocumentManagement/Services/InvertedIndex/InvertedIndexSearcher.cs new file mode 100644 index 0000000..c44c25c --- /dev/null +++ b/06-web-api/src/DocumentManagement/Services/InvertedIndex/InvertedIndexSearcher.cs @@ -0,0 +1,20 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; + +public class InvertedIndexSearcher(IInvertedIndex invertedIndex) : ISearcher +{ + public ICollection Search(List queries) + { + var filteredDocuments = new HashSet(invertedIndex.AllDocuments); + + foreach (var (searchStrategy, keywords) in queries) + { + searchStrategy.FilterDocuments(filteredDocuments, keywords, invertedIndex); + } + + return filteredDocuments; + } + +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/ExcludedSearchStrategy.cs b/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/ExcludedSearchStrategy.cs new file mode 100644 index 0000000..9550676 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/ExcludedSearchStrategy.cs @@ -0,0 +1,16 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService.SearchStrategies; + +public class ExcludedSearchStrategy : ISearchStrategy +{ + public void FilterDocuments(HashSet documents, List keywords, IInvertedIndex invertedIndex) + { + foreach (var keyword in keywords) + { + HashSet currentFiles = invertedIndex.GetDocumentsByKeyword(keyword); + documents.ExceptWith(currentFiles); + } + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/MandatorySearchStrategy.cs b/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/MandatorySearchStrategy.cs new file mode 100644 index 0000000..5354c8a --- /dev/null +++ b/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/MandatorySearchStrategy.cs @@ -0,0 +1,16 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService.SearchStrategies; + +public class MandatorySearchStrategy : ISearchStrategy +{ + public void FilterDocuments(HashSet documents, List keywords, IInvertedIndex invertedIndex) + { + foreach (var keyword in keywords) + { + HashSet currentFiles = invertedIndex.GetDocumentsByKeyword(keyword); + documents.IntersectWith(currentFiles); + } + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/OptionalSearchStrategy.cs b/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/OptionalSearchStrategy.cs new file mode 100644 index 0000000..f34d067 --- /dev/null +++ b/06-web-api/src/DocumentManagement/Services/InvertedIndex/SearchStrategies/OptionalSearchStrategy.cs @@ -0,0 +1,22 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService.SearchStrategies; + +public class OptionalSearchStrategy : ISearchStrategy +{ + public void FilterDocuments(HashSet documents, List keywords, IInvertedIndex invertedIndex) + { + var optionalsSet = new HashSet(); + foreach (var keyword in keywords) + { + HashSet currentFiles = invertedIndex.GetDocumentsByKeyword(keyword); + optionalsSet.UnionWith(currentFiles); + } + + if (keywords.Count > 0) + { + documents.IntersectWith(optionalsSet); + } + } +} \ No newline at end of file diff --git a/06-web-api/src/DocumentManagement/Utilities/Tokenizer.cs b/06-web-api/src/DocumentManagement/Utilities/Tokenizer.cs new file mode 100644 index 0000000..19e2e9b --- /dev/null +++ b/06-web-api/src/DocumentManagement/Utilities/Tokenizer.cs @@ -0,0 +1,16 @@ +using System.Text.RegularExpressions; +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; + +namespace Mohaymen.FullTextSearch.DocumentManagement.Utilities; + +public class Tokenizer(string splitRegex = @"[^\w']+") : ITokenizer +{ + public List ExtractKeywords(string text) + { + return Regex.Split(text, splitRegex) + .Where(word => !string.IsNullOrWhiteSpace(word)) + .Select(word => new Keyword(word)) + .ToList(); + } +} \ No newline at end of file diff --git a/06-web-api/src/MyWebApplication/Controllers/InvertedIndexController.cs b/06-web-api/src/MyWebApplication/Controllers/InvertedIndexController.cs new file mode 100644 index 0000000..94aeec1 --- /dev/null +++ b/06-web-api/src/MyWebApplication/Controllers/InvertedIndexController.cs @@ -0,0 +1,27 @@ +using Microsoft.AspNetCore.Mvc; +using Mohaymen.FullTextSearch.MyWebApplication.Helpers; +using Mohaymen.FullTextSearch.MyWebApplication.Interfaces; + +namespace Mohaymen.FullTextSearch.MyWebApplication.Controllers; + +[ApiController] +[Route("[controller]")] +public class InvertedIndexController : ControllerBase +{ + private readonly IApplicationService _applicationService; + public InvertedIndexController(IApplicationService applicationService) + { + _applicationService = applicationService; + } + [HttpGet] + public IActionResult GetAll([FromQuery]QueryObject queryObject) + { + return Ok( + _applicationService.Search( + queryObject.MandatoryWords, + queryObject.OptionalWords, + queryObject.ExcludedWords + ) + ); + } +} \ No newline at end of file diff --git a/06-web-api/src/MyWebApplication/Helpers/QueryObject.cs b/06-web-api/src/MyWebApplication/Helpers/QueryObject.cs new file mode 100644 index 0000000..6ed6700 --- /dev/null +++ b/06-web-api/src/MyWebApplication/Helpers/QueryObject.cs @@ -0,0 +1,8 @@ +namespace Mohaymen.FullTextSearch.MyWebApplication.Helpers; + +public class QueryObject +{ + public List MandatoryWords { get; set; } = new(); + public List ExcludedWords { get; set; } = new(); + public List OptionalWords { get; set; } = new(); +} \ No newline at end of file diff --git a/06-web-api/src/MyWebApplication/Interfaces/IApplicationService.cs b/06-web-api/src/MyWebApplication/Interfaces/IApplicationService.cs new file mode 100644 index 0000000..5d2d542 --- /dev/null +++ b/06-web-api/src/MyWebApplication/Interfaces/IApplicationService.cs @@ -0,0 +1,6 @@ +namespace Mohaymen.FullTextSearch.MyWebApplication.Interfaces; + +public interface IApplicationService +{ + IEnumerable Search(List mandatoryWords, List optionalWords, List excludedWords); +} \ No newline at end of file diff --git a/06-web-api/src/MyWebApplication/MyWebApplication.csproj b/06-web-api/src/MyWebApplication/MyWebApplication.csproj new file mode 100644 index 0000000..6dd106d --- /dev/null +++ b/06-web-api/src/MyWebApplication/MyWebApplication.csproj @@ -0,0 +1,33 @@ + + + + net8.0 + enable + enable + Mohaymen.FullTextSearch.MyWebApplication + + + + + + + + + + + + + + ResXFileCodeGenerator + Resources.Designer.cs + + + + + + PreserveNewest + + + + + diff --git a/06-web-api/src/MyWebApplication/Program.cs b/06-web-api/src/MyWebApplication/Program.cs new file mode 100644 index 0000000..494fda7 --- /dev/null +++ b/06-web-api/src/MyWebApplication/Program.cs @@ -0,0 +1,43 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Services.FilesService; +using Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; +using Mohaymen.FullTextSearch.DocumentManagement.Utilities; +using Mohaymen.FullTextSearch.MyWebApplication.Interfaces; +using Mohaymen.FullTextSearch.MyWebApplication.Services; + +namespace Mohaymen.FullTextSearch.MyWebApplication; + +public class Program +{ + public static void Main(string[] args) + { + var builder = WebApplication.CreateBuilder(args); + + // Add services to the container. + builder.Services.AddAuthorization(); + builder.Services.AddControllers(); + + // Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle + builder.Services.AddEndpointsApiExplorer(); + builder.Services.AddSwaggerGen(); + builder.Services.AddSingleton(); + builder.Services.AddSingleton(); + builder.Services.AddSingleton(); + builder.Services.AddSingleton(); + + var app = builder.Build(); + + // Configure the HTTP request pipeline. + if (app.Environment.IsDevelopment()) + { + app.UseSwagger(); + app.UseSwaggerUI(); + } + + app.MapControllers(); + app.UseHttpsRedirection(); + app.UseAuthorization(); + + app.Run(); + } +} \ No newline at end of file diff --git a/06-web-api/src/MyWebApplication/Properties/launchSettings.json b/06-web-api/src/MyWebApplication/Properties/launchSettings.json new file mode 100644 index 0000000..93ed998 --- /dev/null +++ b/06-web-api/src/MyWebApplication/Properties/launchSettings.json @@ -0,0 +1,41 @@ +{ + "$schema": "http://json.schemastore.org/launchsettings.json", + "iisSettings": { + "windowsAuthentication": false, + "anonymousAuthentication": true, + "iisExpress": { + "applicationUrl": "http://localhost:18722", + "sslPort": 44326 + } + }, + "profiles": { + "http": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "launchUrl": "swagger", + "applicationUrl": "http://localhost:5113", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + }, + "https": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "launchUrl": "swagger", + "applicationUrl": "https://localhost:7100;http://localhost:5113", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + }, + "IIS Express": { + "commandName": "IISExpress", + "launchBrowser": true, + "launchUrl": "swagger", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + } + } +} diff --git a/06-web-api/src/MyWebApplication/Services/ApplicationService.cs b/06-web-api/src/MyWebApplication/Services/ApplicationService.cs new file mode 100644 index 0000000..a658f3a --- /dev/null +++ b/06-web-api/src/MyWebApplication/Services/ApplicationService.cs @@ -0,0 +1,54 @@ +using Mohaymen.FullTextSearch.Assets; +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; +using Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; +using Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService.SearchStrategies; +using Mohaymen.FullTextSearch.MyWebApplication.Interfaces; + +namespace Mohaymen.FullTextSearch.MyWebApplication.Services; + +public class ApplicationService : IApplicationService +{ + private readonly IInvertedIndexBuilder _invertedIndexBuilder; + private readonly ISearcher _invertedIndexSearcher; + private readonly ILogger _logger; + public ApplicationService(IFileReader fileReader, IInvertedIndexBuilder invertedIndexBuilder, ILogger logger) + { + _logger = logger; + var documentsPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, Resources.DocumentsPath); + var fileCollection = fileReader.ReadAllFiles(documentsPath); + _invertedIndexBuilder = invertedIndexBuilder; + var invertedIndex = IndexFiles(fileCollection); + _invertedIndexSearcher = new InvertedIndexSearcher(invertedIndex); + } + + private IInvertedIndex IndexFiles(FileCollection fileCollection) + { + _logger.LogInformation("Processing files..."); + var invertedIndex = _invertedIndexBuilder.IndexFilesWords(fileCollection).Build(); + _logger.LogInformation("{fileCount} files loaded.", fileCollection.FilesCount()); + return invertedIndex; + } + + public IEnumerable Search(List mandatoryWords, List optionalWords, List excludedWords) + { + var query = new List + { + new SearchQuery( + new MandatorySearchStrategy(), + mandatoryWords.Select(word => new Keyword(word)).ToList() + ), + new SearchQuery( + new OptionalSearchStrategy(), + optionalWords.Select(word => new Keyword(word)).ToList() + ), + new SearchQuery( + new ExcludedSearchStrategy(), + excludedWords.Select(word => new Keyword(word)).ToList() + ) + }; + return _invertedIndexSearcher + .Search(query) + .Select(fullPath => Path.GetFileName(fullPath)); + } +} \ No newline at end of file diff --git a/06-web-api/src/MyWebApplication/appsettings.Development.json b/06-web-api/src/MyWebApplication/appsettings.Development.json new file mode 100644 index 0000000..0c208ae --- /dev/null +++ b/06-web-api/src/MyWebApplication/appsettings.Development.json @@ -0,0 +1,8 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning" + } + } +} diff --git a/06-web-api/src/MyWebApplication/appsettings.json b/06-web-api/src/MyWebApplication/appsettings.json new file mode 100644 index 0000000..10f68b8 --- /dev/null +++ b/06-web-api/src/MyWebApplication/appsettings.json @@ -0,0 +1,9 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning" + } + }, + "AllowedHosts": "*" +} diff --git a/06-web-api/src/MyWebApplication/assets/Documents/AddYourDocsHere.txt b/06-web-api/src/MyWebApplication/assets/Documents/AddYourDocsHere.txt new file mode 100644 index 0000000..206f126 --- /dev/null +++ b/06-web-api/src/MyWebApplication/assets/Documents/AddYourDocsHere.txt @@ -0,0 +1 @@ +add your docs to this folder \ No newline at end of file diff --git a/06-web-api/src/MyWebApplication/assets/Resources.Designer.cs b/06-web-api/src/MyWebApplication/assets/Resources.Designer.cs new file mode 100644 index 0000000..3ed2de9 --- /dev/null +++ b/06-web-api/src/MyWebApplication/assets/Resources.Designer.cs @@ -0,0 +1,71 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace Mohaymen.FullTextSearch.Assets { + using System; + + + /// + /// A strongly-typed resource class, for looking up localized strings, etc. + /// + // This class was auto-generated by the StronglyTypedResourceBuilder + // class via a tool like ResGen or Visual Studio. + // To add or remove a member, edit your .ResX file then rerun ResGen + // with the /str option, or rebuild your VS project. + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + internal class Resources { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] + internal Resources() { + } + + /// + /// Returns the cached ResourceManager instance used by this class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Resources.ResourceManager ResourceManager { + get { + if (object.ReferenceEquals(resourceMan, null)) { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Mohaymen.FullTextSearch.MyWebApplication.assets.Resources", typeof(Resources).Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// + /// Overrides the current thread's CurrentUICulture property for all + /// resource lookups using this strongly typed resource class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Globalization.CultureInfo Culture { + get { + return resourceCulture; + } + set { + resourceCulture = value; + } + } + + /// + /// Looks up a localized string similar to assets\Documents. + /// + internal static string DocumentsPath { + get { + return ResourceManager.GetString("DocumentsPath", resourceCulture); + } + } + } +} diff --git a/06-web-api/src/MyWebApplication/assets/Resources.resx b/06-web-api/src/MyWebApplication/assets/Resources.resx new file mode 100644 index 0000000..c221b3e --- /dev/null +++ b/06-web-api/src/MyWebApplication/assets/Resources.resx @@ -0,0 +1,24 @@ + + + + + + + + + + text/microsoft-resx + + + 1.3 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + assets\Documents + + \ No newline at end of file diff --git a/06-web-api/test/DocumentManagement/Models/AdvancedInvertedIndexTest.cs b/06-web-api/test/DocumentManagement/Models/AdvancedInvertedIndexTest.cs new file mode 100644 index 0000000..49733dc --- /dev/null +++ b/06-web-api/test/DocumentManagement/Models/AdvancedInvertedIndexTest.cs @@ -0,0 +1,47 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; +using Mohaymen.FullTextSearch.DocumentManagement.Utilities; + +namespace Mohaymen.FullTextSearch.Test.DocumentManagement.Models; + +public class AdvancedInvertedIndexTest +{ + private readonly AdvancedInvertedIndex _advancedInvertedIndex; + public AdvancedInvertedIndexTest() + { + _advancedInvertedIndex = new AdvancedInvertedIndex(new Tokenizer()); + _advancedInvertedIndex.AddDocumentToKeyword( + new Keyword("key1"), + new KeywordInfo("doc1", 1) + ); + _advancedInvertedIndex.AddDocumentToKeyword( + new Keyword("key1"), + new KeywordInfo ("doc2", 3) + ); + _advancedInvertedIndex.AddDocumentToKeyword( + new Keyword("key2"), + new KeywordInfo ("doc1", 2) + ); + _advancedInvertedIndex.AddDocumentToKeyword( + new Keyword("key2"), + new KeywordInfo ("doc2", 2) + ); + } + + [Fact] + public void GetDocGetDocumentsByKeyword_ShouldWorkCorrectlyForPhrases() + { + // Act + var documents1 = _advancedInvertedIndex.GetDocumentsByKeyword(new Keyword("key1 key2")); + var documents2 = _advancedInvertedIndex.GetDocumentsByKeyword(new Keyword("key2 key1")); + // Assert + Assert.Equal(["doc1"], documents1); + Assert.Equal(["doc2"], documents2); + } + + [Fact] + public void GetDocGetDocumentsByKeyword_ShouldWorkCorrectlyForWords() + { + var documents1 = _advancedInvertedIndex.GetDocumentsByKeyword(new Keyword("key1")); + Assert.Equal(documents1, ["doc1", "doc2"]); + } +} \ No newline at end of file diff --git a/06-web-api/test/DocumentManagement/Services/Files/FileReaderTest.cs b/06-web-api/test/DocumentManagement/Services/Files/FileReaderTest.cs new file mode 100644 index 0000000..6e1ed8e --- /dev/null +++ b/06-web-api/test/DocumentManagement/Services/Files/FileReaderTest.cs @@ -0,0 +1,69 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Services.FilesService; + +namespace Mohaymen.FullTextSearch.Test.DocumentManagement.Services.Files; + +public class FileReaderTest : IDisposable +{ + private readonly FileReader _fileReader; + private readonly string _testDirectory; + private readonly string _emptyFolderPath; + + public FileReaderTest() + { + _testDirectory = CreateTestDirectoryWithFiles(); + _emptyFolderPath = CreateEmptyDirectory(); + _fileReader = new FileReader(); + } + + private string CreateTestDirectoryWithFiles() + { + var testDirectory = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + Directory.CreateDirectory(testDirectory); + + var filePath1 = Path.Combine(testDirectory, "file1.txt"); + var filePath2 = Path.Combine(testDirectory, "file2.txt"); + File.WriteAllText(filePath1, "Content of file1"); + File.WriteAllText(filePath2, "Content of file2"); + + return testDirectory; + } + + private string CreateEmptyDirectory() + { + var emptyFolderPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + Directory.CreateDirectory(emptyFolderPath); + + return emptyFolderPath; + } + + + [Fact] + public void ReadAllFiles_ShouldReadAllFilesInADirectory() + { + // Act + var result = _fileReader.ReadAllFiles(_testDirectory); + + // Assert + Assert.Equal(2, result.FilesCount()); + Assert.Contains(Path.Combine(_testDirectory, "file1.txt"), result.GetFilesPath()); + Assert.Contains(Path.Combine(_testDirectory, "file2.txt"), result.GetFilesPath()); + Assert.Equal("Content of file1", result.GetFileContent(Path.Combine(_testDirectory, "file1.txt"))); + Assert.Equal("Content of file2", result.GetFileContent(Path.Combine(_testDirectory, "file2.txt"))); + } + + [Fact] + public void ReadAllFiles_ShouldHandleEmptyDirectory() + { + // Act + var result = _fileReader.ReadAllFiles(_emptyFolderPath); + + // Assert + Assert.Equal(0, result.FilesCount()); + } + + public void Dispose() + { + if (Directory.Exists(_testDirectory)) + Directory.Delete(_testDirectory, true); + } +} \ No newline at end of file diff --git a/06-web-api/test/DocumentManagement/Services/InvertedIndex/FilesAdvancedInvertedIndexBuilderTest.cs b/06-web-api/test/DocumentManagement/Services/InvertedIndex/FilesAdvancedInvertedIndexBuilderTest.cs new file mode 100644 index 0000000..a5a40d8 --- /dev/null +++ b/06-web-api/test/DocumentManagement/Services/InvertedIndex/FilesAdvancedInvertedIndexBuilderTest.cs @@ -0,0 +1,47 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; +using Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; +using Mohaymen.FullTextSearch.DocumentManagement.Utilities; +using NSubstitute; + +namespace Mohaymen.FullTextSearch.Test.DocumentManagement.Services.InvertedIndex; + +public class FilesAdvancedInvertedIndexBuilderTest +{ + private readonly ITokenizer _tokenizer; + private readonly FilesAdvancedInvertedIndexBuilder _filesAdvancedInvertedIndexBuilder; + + public FilesAdvancedInvertedIndexBuilderTest() + { + _tokenizer = new Tokenizer(); + _filesAdvancedInvertedIndexBuilder = new FilesAdvancedInvertedIndexBuilder(_tokenizer); + } + + [Fact] + public void IndexFilesWords_ValidFileCollection_ShouldIndexAllWords() + { + // Arrange + var fileCollection = new FileCollection(); + fileCollection.AddFile("doc1.txt", "star academy star"); + fileCollection.AddFile("doc2.txt", "star coder academy"); + fileCollection.AddFile("doc3.txt", "academy coder"); + fileCollection.AddFile("doc4.txt", "summer"); + + var expectedInvertedIndex = new AdvancedInvertedIndex(_tokenizer); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("star"), new KeywordInfo("doc1.txt", 0)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("academy"), new KeywordInfo("doc1.txt", 1)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("star"), new KeywordInfo("doc1.txt", 2)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("star"), new KeywordInfo("doc2.txt", 0)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("coder"), new KeywordInfo("doc2.txt", 1)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("academy"), new KeywordInfo("doc2.txt", 2)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("academy"), new KeywordInfo("doc3.txt", 0)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("coder"), new KeywordInfo("doc3.txt", 1)); + expectedInvertedIndex.AddDocumentToKeyword(new Keyword("summer"), new KeywordInfo("doc4.txt", 0)); + + // Act + IInvertedIndex invertedIndex = _filesAdvancedInvertedIndexBuilder.IndexFilesWords(fileCollection).Build(); + + // Assert + Assert.Equal(expectedInvertedIndex, invertedIndex); + } +} \ No newline at end of file diff --git a/06-web-api/test/DocumentManagement/Services/InvertedIndex/FilesInvertedIndexBuilderTest.cs b/06-web-api/test/DocumentManagement/Services/InvertedIndex/FilesInvertedIndexBuilderTest.cs new file mode 100644 index 0000000..2f41b82 --- /dev/null +++ b/06-web-api/test/DocumentManagement/Services/InvertedIndex/FilesInvertedIndexBuilderTest.cs @@ -0,0 +1,47 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; +using Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; +using NSubstitute; + +namespace Mohaymen.FullTextSearch.Test.DocumentManagement.Services.InvertedIndex; + +public class FilesInvertedIndexBuilderTest +{ + private readonly ITokenizer _tokenizer; + private readonly FilesInvertedIndexBuilder _filesInvertedIndexBuilder; + + public FilesInvertedIndexBuilderTest() + { + _tokenizer = Substitute.For(); + _filesInvertedIndexBuilder = new FilesInvertedIndexBuilder(_tokenizer); + } + + [Fact] + public void IndexFilesWords_ValidFileCollection_ShouldIndexAllWords() + { + // Arrange + var fileCollection = new FileCollection(); + fileCollection.AddFile("doc1.txt", "star academy star"); + fileCollection.AddFile("doc2.txt", "star coder"); + fileCollection.AddFile("doc3.txt", "academy coder"); + fileCollection.AddFile("doc4.txt", "summer"); + + _tokenizer.ExtractKeywords("star academy star") + .Returns([new Keyword("star"), new Keyword("academy"), new Keyword("star")]); + _tokenizer.ExtractKeywords("star coder") + .Returns([new Keyword("star"), new Keyword("coder")]); + _tokenizer.ExtractKeywords("academy coder") + .Returns([new Keyword("academy"), new Keyword("coder")]); + _tokenizer.ExtractKeywords("summer") + .Returns([new Keyword("summer")]); + + // Act + IInvertedIndex invertedIndex = _filesInvertedIndexBuilder.IndexFilesWords(fileCollection).Build(); + + // Assert + Assert.Equal(["doc1.txt", "doc2.txt"], invertedIndex.GetDocumentsByKeyword(new Keyword("star"))); + Assert.Equal(["doc1.txt", "doc3.txt"], invertedIndex.GetDocumentsByKeyword(new Keyword("academy"))); + Assert.Equal(["doc2.txt", "doc3.txt"], invertedIndex.GetDocumentsByKeyword(new Keyword("coder"))); + Assert.Equal(["doc4.txt"], invertedIndex.GetDocumentsByKeyword(new Keyword("summer"))); + } +} \ No newline at end of file diff --git a/06-web-api/test/DocumentManagement/Services/InvertedIndex/InvertedIndexSearcherTest.cs b/06-web-api/test/DocumentManagement/Services/InvertedIndex/InvertedIndexSearcherTest.cs new file mode 100644 index 0000000..c5922d6 --- /dev/null +++ b/06-web-api/test/DocumentManagement/Services/InvertedIndex/InvertedIndexSearcherTest.cs @@ -0,0 +1,104 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Interfaces; +using Mohaymen.FullTextSearch.DocumentManagement.Models; +using Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService; +using Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService.SearchStrategies; +using NSubstitute; + +namespace Mohaymen.FullTextSearch.Test.DocumentManagement.Services.InvertedIndex; + +public class InvertedIndexSearcherTest +{ + private IInvertedIndex _invertedIndex; + private InvertedIndexSearcher _searcher; + + [Fact] + public void Search_WithMultipleStrategies_CombinesFiltersCorrectly() + { + // Arrange + var invertedIndex = Substitute.For(); + + var keywordDocumentMapping = new Dictionary> + { + { new Keyword("star"), ["doc1.txt", "doc2.txt"] }, + { new Keyword("academy"), ["doc1.txt", "doc3.txt"] }, + { new Keyword("coder"), ["doc2.txt", "doc3.txt"] }, + { new Keyword("summer"), ["doc4.txt"] } + }; + + foreach (var entry in keywordDocumentMapping) + { + invertedIndex.GetDocumentsByKeyword(entry.Key) + .Returns(entry.Value); + } + + invertedIndex.AllDocuments.Returns(["doc1.txt", "doc2.txt", "doc3.txt", "doc4.txt"]); + _searcher = new InvertedIndexSearcher(invertedIndex); + + var mandatorySearchStrategy = new MandatorySearchStrategy(); + var optionalSearchStrategy = new OptionalSearchStrategy(); + var excludedSearchStrategy = new ExcludedSearchStrategy(); + var mandatoryKeywords = new List { new Keyword("academy") }; + var optionalKeywords = new List { new Keyword("coder") }; + var excludedKeywords = new List { new Keyword("star") }; + + var queries = new List + { + new SearchQuery(mandatorySearchStrategy, mandatoryKeywords), + new SearchQuery(optionalSearchStrategy, optionalKeywords), + new SearchQuery(excludedSearchStrategy, excludedKeywords) + }; + + // Act + var results = _searcher.Search(queries); + + // Assert + var expected = new HashSet { "doc3.txt" }; + Assert.True(expected.SetEquals(results), $"Expected: {string.Join(", ", expected)}, Actual: {string.Join(", ", results)}"); + } + + [Fact] + public void Search_WithMultipleStrategies_CombinesFiltersCorrectly_When() + { + // Arrange + var invertedIndex = Substitute.For(); + + var keywordDocumentMapping = new Dictionary> + { + { new Keyword("star academy"), ["doc1.txt", "doc2.txt"] }, + { new Keyword("academy"), ["doc1.txt", "doc2.txt", "doc3.txt"] }, + { new Keyword("star"), ["doc1.txt", "doc2.txt", "doc4.txt"] }, + { new Keyword("coder"), ["doc2.txt", "doc3.txt"] }, + { new Keyword("summer"), ["doc1.txt", "doc2.txt" ,"doc4.txt"] } + }; + + foreach (var entry in keywordDocumentMapping) + { + invertedIndex.GetDocumentsByKeyword(entry.Key) + .Returns(entry.Value); + } + + invertedIndex.AllDocuments.Returns(["doc1.txt", "doc2.txt", "doc3.txt", "doc4.txt"]); + _searcher = new InvertedIndexSearcher(invertedIndex); + + var mandatorySearchStrategy = new MandatorySearchStrategy(); + var optionalSearchStrategy = new OptionalSearchStrategy(); + var excludedSearchStrategy = new ExcludedSearchStrategy(); + var mandatoryKeywords = new List { new Keyword("star academy") }; + var optionalKeywords = new List { new Keyword("summer") }; + var excludedKeywords = new List { new Keyword("coder") }; + + var queries = new List + { + new SearchQuery(mandatorySearchStrategy, mandatoryKeywords), + new SearchQuery(optionalSearchStrategy, optionalKeywords), + new SearchQuery(excludedSearchStrategy, excludedKeywords) + }; + + // Act + var results = _searcher.Search(queries); + + // Assert + var expected = new HashSet { "doc1.txt" }; + Assert.True(expected.SetEquals(results), $"Expected: {string.Join(", ", expected)}, Actual: {string.Join(", ", results)}"); + } +} \ No newline at end of file diff --git a/06-web-api/test/DocumentManagement/Utilities/TokenizerTest.cs b/06-web-api/test/DocumentManagement/Utilities/TokenizerTest.cs new file mode 100644 index 0000000..d833023 --- /dev/null +++ b/06-web-api/test/DocumentManagement/Utilities/TokenizerTest.cs @@ -0,0 +1,29 @@ +using Mohaymen.FullTextSearch.DocumentManagement.Models; +using Mohaymen.FullTextSearch.DocumentManagement.Utilities; + +namespace Mohaymen.FullTextSearch.Test.DocumentManagement.Utilities; + +public class TokenizersTests +{ + public static IEnumerable GetTestData() + { + yield return ["sta'r", new List { new ("sta'r") }]; + yield return ["'star", new List { new ("'star") }]; + yield return ["st 'ar", new List { new ("st"), new ("'ar") }]; + yield return ["st' ar", new List { new("st'"), new("ar") }]; + } + + [Theory] + [MemberData(nameof(GetTestData))] + public void ExtractKeywords_ShouldTokenizeText_WhenStringContainsSingleQuotationAndSpace(string text, List expectedKeywords) + { + //Arrange + var tokenizer = new Tokenizer(); + + //Act + var keywords = tokenizer.ExtractKeywords(text); + + //Assert + Assert.Equal(expectedKeywords, keywords); + } +} \ No newline at end of file diff --git a/06-web-api/test/test.csproj b/06-web-api/test/test.csproj new file mode 100644 index 0000000..34432b7 --- /dev/null +++ b/06-web-api/test/test.csproj @@ -0,0 +1,28 @@ + + + + net8.0 + enable + enable + Mohaymen.FullTextSearch.Test + false + true + + + + + + + + + + + + + + + + + + +