Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/web api #7

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions 06-web-api/full-text-search.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DocumentManagement", "src\DocumentManagement\DocumentManagement.csproj", "{D471FD38-26BA-4DEF-96A2-982F235AEA01}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "test", "test\test.csproj", "{FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MyWebApplication", "src\MyWebApplication\MyWebApplication.csproj", "{FD4AE171-A581-4FD2-899C-C1BB5EAFE641}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{D471FD38-26BA-4DEF-96A2-982F235AEA01}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D471FD38-26BA-4DEF-96A2-982F235AEA01}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D471FD38-26BA-4DEF-96A2-982F235AEA01}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D471FD38-26BA-4DEF-96A2-982F235AEA01}.Release|Any CPU.Build.0 = Release|Any CPU
{FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FD776EED-DBA5-4789-A1A7-FF97EF11FBD3}.Release|Any CPU.Build.0 = Release|Any CPU
{FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FD4AE171-A581-4FD2-899C-C1BB5EAFE641}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
10 changes: 10 additions & 0 deletions 06-web-api/src/DocumentManagement/DocumentManagement.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>Mohaymen.FullTextSearch.DocumentManagement</RootNamespace>
</PropertyGroup>

</Project>
8 changes: 8 additions & 0 deletions 06-web-api/src/DocumentManagement/Interfaces/IFileReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
using Mohaymen.FullTextSearch.DocumentManagement.Models;

namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

public interface IFileReader
{
FileCollection ReadAllFiles(string folderPath);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using Mohaymen.FullTextSearch.DocumentManagement.Models;

namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

public interface IInvertedIndex
{
HashSet<string> AllDocuments { get; }
HashSet<string> GetDocumentsByKeyword(Keyword keyword);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using Mohaymen.FullTextSearch.DocumentManagement.Models;

namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

public interface IInvertedIndexBuilder
{
IInvertedIndex Build();
IInvertedIndexBuilder IndexFilesWords(FileCollection fileCollection);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
using Mohaymen.FullTextSearch.DocumentManagement.Models;

namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

public interface ISearchStrategy
{
void FilterDocuments(HashSet<string> documents, List<Keyword> keywords, IInvertedIndex invertedIndex);
}
7 changes: 7 additions & 0 deletions 06-web-api/src/DocumentManagement/Interfaces/ISearcher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
using Mohaymen.FullTextSearch.DocumentManagement.Models;
namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

public interface ISearcher<T>
{
ICollection<T> Search(List<SearchQuery> query);
}
8 changes: 8 additions & 0 deletions 06-web-api/src/DocumentManagement/Interfaces/ITokenizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
using Mohaymen.FullTextSearch.DocumentManagement.Models;

namespace Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

public interface ITokenizer
{
List<Keyword> ExtractKeywords(string text);
}
90 changes: 90 additions & 0 deletions 06-web-api/src/DocumentManagement/Models/AdvancedInvertedIndex.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
using Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

namespace Mohaymen.FullTextSearch.DocumentManagement.Models;

public class AdvancedInvertedIndex : IInvertedIndex, IEquatable<AdvancedInvertedIndex>
{
private readonly Dictionary<Keyword, HashSet<KeywordInfo>> _invertedIndexMap = [];
public HashSet<string> AllDocuments { get; } = [];
private ITokenizer _tokenizer;

public AdvancedInvertedIndex(ITokenizer tokenizer)
{
_tokenizer = tokenizer;
}

public void AddDocumentToKeyword(Keyword keyword, KeywordInfo keywordInfo)
{
AllDocuments.Add(keywordInfo.Document);

if (!_invertedIndexMap.ContainsKey(keyword))
_invertedIndexMap.Add(keyword, []);

_invertedIndexMap[keyword].Add(keywordInfo);
}
public HashSet<string> GetDocumentsByKeyword(Keyword phrase)
{
var phraseWords = _tokenizer.ExtractKeywords(phrase.Word);

if (!phraseWords.Any())
{
return [];
}

foreach (var phraseWord in phraseWords)
{
if (!_invertedIndexMap.ContainsKey(phraseWord))
{
return [];
}
}

var keywordInfos = new HashSet<KeywordInfo>(_invertedIndexMap[phraseWords[0]]);

for (int i=1; i<phraseWords.Count; i++)
{
var currentKeywordInfos = new HashSet<KeywordInfo>(_invertedIndexMap[phraseWords[i]]);
currentKeywordInfos.RemoveWhere(keywordInfo =>
!keywordInfos.Contains(new KeywordInfo(keywordInfo.Document, keywordInfo.Position - 1))
);

keywordInfos = currentKeywordInfos;
}

return keywordInfos.Select(keywordInfo => keywordInfo.Document).ToHashSet();
}


public bool Equals(AdvancedInvertedIndex? other)
{
if (other is null) return false;

if (_invertedIndexMap.Count != other._invertedIndexMap.Count)
return false;

foreach (var kvp in _invertedIndexMap)
{
if (!other._invertedIndexMap.TryGetValue(kvp.Key, out var otherSet))
return false;

if (!kvp.Value.SetEquals(otherSet))
return false;
}

var areDocumentsEqual = AllDocuments.SetEquals(other.AllDocuments);
return areDocumentsEqual;
}

public override bool Equals(object? obj)
{
if (ReferenceEquals(null, obj)) return false;
if (ReferenceEquals(this, obj)) return true;
if (obj.GetType() != this.GetType()) return false;
return Equals((AdvancedInvertedIndex)obj);
}

public override int GetHashCode()
{
return HashCode.Combine(_invertedIndexMap, AllDocuments);
}
}
21 changes: 21 additions & 0 deletions 06-web-api/src/DocumentManagement/Models/FileCollection.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
namespace Mohaymen.FullTextSearch.DocumentManagement.Models;

public class FileCollection
{
private readonly Dictionary<string, string> _filesDataDictionary = [];

public void AddFile(string filePath, string fileContent) =>
_filesDataDictionary.Add(filePath, fileContent);

public List<string> GetFilesPath() =>
_filesDataDictionary.Keys.ToList();

public string GetFileContent(string filePath) =>
_filesDataDictionary[filePath];

public bool ContainsFile(string filePath) =>
_filesDataDictionary.ContainsKey(filePath);

public int FilesCount() =>
_filesDataDictionary.Count;
}
26 changes: 26 additions & 0 deletions 06-web-api/src/DocumentManagement/Models/InvertedIndex.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

namespace Mohaymen.FullTextSearch.DocumentManagement.Models;

public class InvertedIndex : IInvertedIndex
{
private readonly Dictionary<Keyword, HashSet<string>> _invertedIndexMap = [];
public HashSet<string> AllDocuments { get; } = [];

public void AddDocumentToKeyword(Keyword keyword, string document)
{
AllDocuments.Add(document);

if (!_invertedIndexMap.ContainsKey(keyword))
_invertedIndexMap.Add(keyword, []);

_invertedIndexMap[keyword].Add(document);
}

public HashSet<string> GetDocumentsByKeyword(Keyword keyword)
{
_invertedIndexMap.TryGetValue(keyword, out HashSet<string>? documents);

return documents ?? [];
}
}
11 changes: 11 additions & 0 deletions 06-web-api/src/DocumentManagement/Models/Keyword.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Mohaymen.FullTextSearch.DocumentManagement.Models;

public record Keyword
{
public Keyword(string word)
{
Word = word.ToUpper();
}

public string Word { get; init; }
}
3 changes: 3 additions & 0 deletions 06-web-api/src/DocumentManagement/Models/KeywordInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
namespace Mohaymen.FullTextSearch.DocumentManagement.Models;

public record KeywordInfo(string Document, int Position);
5 changes: 5 additions & 0 deletions 06-web-api/src/DocumentManagement/Models/SearchQuery.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
using Mohaymen.FullTextSearch.DocumentManagement.Interfaces;

namespace Mohaymen.FullTextSearch.DocumentManagement.Models;

public record SearchQuery(ISearchStrategy SearchStrategy, List<Keyword> Keywords);
25 changes: 25 additions & 0 deletions 06-web-api/src/DocumentManagement/Services/Files/FileReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
using Mohaymen.FullTextSearch.DocumentManagement.Interfaces;
using Mohaymen.FullTextSearch.DocumentManagement.Models;
namespace Mohaymen.FullTextSearch.DocumentManagement.Services.FilesService;

public class FileReader : IFileReader
{
public FileCollection ReadAllFiles(string folderPath)
{
var files = Directory.GetFiles(folderPath);
var fileCollection = files.Aggregate(new FileCollection(), AddFileToCollection);

return fileCollection;
}

private FileCollection AddFileToCollection(FileCollection collection, string filePath)
{
if (!collection.ContainsFile(filePath))
collection.AddFile(
filePath,
File.ReadAllText(filePath)
);

return collection;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using Mohaymen.FullTextSearch.DocumentManagement.Interfaces;
using Mohaymen.FullTextSearch.DocumentManagement.Models;

namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService;

public class FilesAdvancedInvertedIndexBuilder : IInvertedIndexBuilder
{
private AdvancedInvertedIndex _advancedInvertedIndex;
private ITokenizer _tokenizer;

public FilesAdvancedInvertedIndexBuilder(ITokenizer tokenizer)
{
_advancedInvertedIndex = new(tokenizer);
_tokenizer = tokenizer;
}

public IInvertedIndexBuilder IndexFilesWords(FileCollection fileCollection)
{
foreach (var filePath in fileCollection.GetFilesPath())
{
var keywords = _tokenizer.ExtractKeywords(fileCollection.GetFileContent(filePath));
UpdateInvertedIndexMap(keywords, filePath);
}

return this;
}

private void UpdateInvertedIndexMap(List<Keyword> keywords, string filePath)
{
for (var i = 0; i < keywords.Count; i++)
{
_advancedInvertedIndex.AddDocumentToKeyword(keywords[i], new KeywordInfo(filePath, i));
}
}


public IInvertedIndex Build()
{
return _advancedInvertedIndex;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
using Mohaymen.FullTextSearch.DocumentManagement.Interfaces;
using Mohaymen.FullTextSearch.DocumentManagement.Models;
using Mohaymen.FullTextSearch.DocumentManagement.Utilities;

namespace Mohaymen.FullTextSearch.DocumentManagement.Services.InvertedIndexService;

public class FilesInvertedIndexBuilder : IInvertedIndexBuilder
{
private readonly InvertedIndex _invertedIndex = new();
private readonly ITokenizer _tokenizer;

public FilesInvertedIndexBuilder(ITokenizer tokenizer)
{
_tokenizer = tokenizer;
}

public IInvertedIndexBuilder IndexFilesWords(FileCollection fileCollection)
{
foreach (var filePath in fileCollection.GetFilesPath())
{
var keywords = _tokenizer.ExtractKeywords(fileCollection.GetFileContent(filePath));
UpdateInvertedIndexMap(keywords, filePath);
}

return this;
}

private void UpdateInvertedIndexMap(List<Keyword> keywords, string filePath)
{
foreach (var keyword in keywords)
{
_invertedIndex.AddDocumentToKeyword(keyword, filePath);
}
}

public IInvertedIndex Build()
{
return _invertedIndex;
}
}
Loading