Skip to content

Commit

Permalink
Merge pull request #11 from luleyleo/core-next
Browse files Browse the repository at this point in the history
Rewrite search engine
  • Loading branch information
luleyleo authored Oct 7, 2024
2 parents 97f210d + f06187e commit dc54cc2
Show file tree
Hide file tree
Showing 21 changed files with 1,108 additions and 1,399 deletions.
360 changes: 165 additions & 195 deletions Cargo.lock

Large diffs are not rendered by default.

775 changes: 368 additions & 407 deletions build-aux/cargo-sources.json

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
name = "clapgrep-core"
version = "1.0.0"
edition = "2021"
description = "Clapgrep core library"

[dependencies]
regex = "1.10" # for pattern matching
num_cpus = "1.16.0" # for parallel search
grep = "0.3" # ripgrep
ignore = "0.4" # parallel walker from ripgrep
walkdir = "2.4" # walker from ripgrep
termcolor = "1.4" # unly used for grep to get nocolor
pdf-extract = "0.7.9" # for pdf
dotext = "0.1.1" # for office docs
regex = "1.10"
grep = "0.3"
ignore = "0.4"
flume = "0.11.0"

# for office support
dotext = "0.1.1"

# for pdf support
pdf-extract = "0.7.9"
euclid = "0.20.5"
3 changes: 0 additions & 3 deletions core/README.md

This file was deleted.

76 changes: 76 additions & 0 deletions core/src/engine.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use crate::{
search::{self, SearchId, SearchParameters, SharedSearchId},
SearchMessage,
};
use flume::{Receiver, Sender};
use std::{
path::PathBuf,
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
thread,
};

pub struct SearchEngine {
pub(crate) sender: Sender<SearchMessage>,
pub(crate) receiver: Receiver<SearchMessage>,
pub(crate) current_search_id: SharedSearchId,
}

impl Default for SearchEngine {
fn default() -> Self {
let (sender, receiver) = flume::unbounded();

SearchEngine {
sender,
receiver,
current_search_id: Arc::new(AtomicUsize::new(0)),
}
}
}

impl SearchEngine {
pub fn receiver(&self) -> Receiver<SearchMessage> {
self.receiver.clone()
}

pub fn search(&self, params: SearchParameters) {
self.current_search_id.fetch_add(1, Ordering::Release);

let engine = self.clone();
thread::spawn(move || search::run(engine, params));
}

pub fn cancel(&self) {
self.current_search_id.fetch_add(1, Ordering::Release);
}

pub fn is_current(&self, message: &SearchMessage) -> bool {
let current = self.current_search_id.load(Ordering::Acquire);

message.search() == current
}

pub(crate) fn clone(&self) -> Self {
SearchEngine {
sender: self.sender.clone(),
receiver: self.receiver.clone(),
current_search_id: self.current_search_id.clone(),
}
}

pub(crate) fn send_error(
&self,
search: SearchId,
path: PathBuf,
message: String,
) -> Result<(), flume::SendError<SearchMessage>> {
self.sender
.send(SearchMessage::Error(crate::result::SearchError {
search,
path,
message,
}))
}
}
96 changes: 0 additions & 96 deletions core/src/extended.rs

This file was deleted.

9 changes: 9 additions & 0 deletions core/src/extra/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use crate::search::SearchSink;
use grep::{regex::RegexMatcher, searcher::Searcher};
use std::{error::Error, path::Path};

pub mod office;
pub mod pdf;

pub type ExtraFn =
fn(&mut Searcher, &RegexMatcher, &Path, &mut SearchSink) -> Result<(), Box<dyn Error>>;
55 changes: 55 additions & 0 deletions core/src/extra/office.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use crate::search::SearchSink;
use dotext::{doc::OpenOfficeDoc, *};
use grep::{regex::RegexMatcher, searcher::Searcher};
use std::{error::Error, io::Read, path::Path};

pub static EXTENSIONS: &[&str] = &["docx", "pptx", "xlsx", "odt", "odp", "ods"];

pub fn process(
searcher: &mut Searcher,
matcher: &RegexMatcher,
path: &Path,
sink: &mut SearchSink,
) -> Result<(), Box<dyn Error>> {
let text = extract(path)?;
searcher.search_slice(matcher, text.as_bytes(), sink)?;
Ok(())
}

fn extract(path: &Path) -> Result<String, Box<dyn Error>> {
let ext = path
.extension()
.unwrap_or_default()
.to_string_lossy()
.to_string();

let mut string = String::new();
match ext.as_str() {
"docx" => {
let mut docx = Docx::open(path)?;
docx.read_to_string(&mut string)?;
}
"xlsx" => {
let mut xlsx = Xlsx::open(path)?;
xlsx.read_to_string(&mut string)?;
}
"pptx" => {
let mut pptx = Pptx::open(path)?;
pptx.read_to_string(&mut string)?;
}
"odt" => {
let mut odt = Odt::open(path)?;
odt.read_to_string(&mut string)?;
}
// "ods" => {
// let ods = Ods::open(&path)?;
// ods.read_to_string(&mut string)?;
// }
"odp" => {
let mut odp = Odp::open(path)?;
odp.read_to_string(&mut string)?;
}
_ => return Err("unknown extension".into()),
}
Ok(string)
}
17 changes: 16 additions & 1 deletion core/src/extended/pdf.rs → core/src/extra/pdf.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
use crate::search::SearchSink;
use euclid::vec2;
use grep::{regex::RegexMatcher, searcher::Searcher};
use pdf_extract::{
encryption::DecryptionError, ConvertToFmt, Document, MediaBox, OutputDev, OutputError,
Transform,
};
use std::{error::Error, fmt::Write, panic::catch_unwind, path::Path};

pub fn extract_pdf(path: &Path) -> Result<String, Box<dyn Error>> {
pub static EXTENSIONS: &[&str] = &["pdf"];

pub fn process(
searcher: &mut Searcher,
matcher: &RegexMatcher,
path: &Path,
sink: &mut SearchSink,
) -> Result<(), Box<dyn Error>> {
let text = extract(path)?;
searcher.search_slice(matcher, text.as_bytes(), sink)?;
Ok(())
}

fn extract(path: &Path) -> Result<String, Box<dyn Error>> {
let path = path.to_owned();
//because the library panics, we need to catch panics
let res = catch_unwind(|| extract_text(&path));
Expand Down
40 changes: 0 additions & 40 deletions core/src/fileinfo.rs

This file was deleted.

18 changes: 12 additions & 6 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
pub mod extended;
pub mod fileinfo;
pub mod manager;
pub mod options;
pub mod rgtools;
pub mod search;
mod engine;
mod result;
mod search;
mod utils;

pub mod extra;

pub use engine::SearchEngine;
pub use result::{Location, ResultEntry, SearchMessage, SearchResult};
pub use search::{SearchFlags, SearchParameters};

pub use grep::matcher::Match;
Loading

0 comments on commit dc54cc2

Please sign in to comment.