Skip to content

Commit

Permalink
fix fts5 operation error, basic type detection
Browse files Browse the repository at this point in the history
- sqlite3.operationalerror fix
- basic type detection
- update readme
  • Loading branch information
Liblor committed Jun 24, 2022
1 parent 8a46a8b commit 38e996c
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 7 deletions.
103 changes: 101 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,101 @@
# knovleks
Personal Search Engine for different types of resources
# Knovleks

Personal Search Engine for different types of resources.

![Screenshot of Knovleks TUI](https://user-images.githubusercontent.com/4940804/175700234-41b43332-7031-4852-a397-d6af8a8577d2.png)

Knovleks can currently index websites, pdf files and text notes.

- [Install](#install)
- [Usage](#usage)
* [Index](#index)
* [Search](#search)
* [Tag filter](#tag-filter)
* [TUI](#tui)
+ [Searchbar focused](#searchbar-focused)
+ [Results focused](#results-focused)

## Install

```
pip install knovleks
```

## Usage

```
Usage: knovleks [OPTIONS] COMMAND [ARGS]...
Options:
-h, --help Show this message and exit.
Commands:
index
search full-text search
tag-filter tag filter
tui terminal user interface (experimental)
```

### Index

```
Usage: knovleks index [OPTIONS] DOCUMENT
Options:
-t, --tag TEXT
--title TEXT
-d, --type, --document-type TEXT
-h, --help Show this message and exit.
```

### Search

```
Usage: knovleks search [OPTIONS] QUERY
full-text search
Options:
-t, --tag TEXT
-st, --show-tags
-l, --limit INTEGER
-dt, --doc-type TEXT
-ft, --full-text display full text
-h, --help Show this message and exit.
```

### Tag filter

```
Usage: knovleks tag-filter [OPTIONS] [TAG]...
tag filter
Options:
-st, --show-tags
-l, --limit INTEGER
-dt, --doc-type TEXT
-h, --help Show this message and exit.
```

### TUI

```
Switch focus: TAB
Next result: ctrl+j
Previous result: ctrl+k
Open result without closing: ctrl+l
```

#### Searchbar focused

```
Exit: ESC
```

#### Results focused

```
Switch focus to searchbar: ESC
Open result: Enter
```
21 changes: 20 additions & 1 deletion knovleks/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ def get_supported_document_types() -> Mapping[str, Type[IdocumentType]]:
}


def is_url(path: str) -> bool:
url_prefixes = ["https://", "http://"]
path = path.lower()
return any(map(path.startswith, url_prefixes))


def determine_doc_type(document: str) -> str:
# TODO: determine ooc based on configuration file
if is_url(document):
return "website"
# XXX: filetype shouldn't be determined based on extension
elif document.endswith(".pdf"):
return "pdf"
else:
return "note"


@click.group(context_settings=dict(help_option_names=["-h", "--help"]))
@click.pass_context
def cli(ctx):
Expand All @@ -54,10 +71,12 @@ def cli(ctx):
@click.argument("document")
@click.option("-t", "--tag", multiple=True)
@click.option("--title", default="")
@click.option("-d", "--type", "--document-type", default="note")
@click.option("-d", "--type", "--document-type", default="auto")
@click.pass_obj
def index(knov: Knovleks, document: str, tag: Tuple[str],
title: str, type: str):
if type == "auto":
type = determine_doc_type(document)
knov.index_document(type, document, title, set(tag))


Expand Down
4 changes: 2 additions & 2 deletions knovleks/document_types/pdf_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ def parse(self):
@staticmethod
def open_doc(href, elem_idx):
dn = subprocess.DEVNULL
subprocess.Popen(["/usr/bin/zathura", f"{href}", f"-P", f"{elem_idx}"],
stdin=dn, stdout=dn, stderr=dn, close_fds=True)
subprocess.Popen(["/usr/bin/zathura", f"{href}", "-P", f"{elem_idx}"],
stdin=dn, stdout=dn, stderr=dn, close_fds=True)
13 changes: 12 additions & 1 deletion knovleks/knovleks.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ def _content_column_snippet(self,
(snip.left, snip.right, snip.trunc_text, f"{snip.token_nr}"))
return "snippet(doc_parts_fts, 0, ?, ?, ?, ?)"

def _quote_string(self, string: str) -> str:
string = string.replace('"', '""')
return f'"{string}"'

def search(self, search_query: str, tags: Set[str] = set(),
limit: Optional[int] = None,
doc_type: Optional[str] = None,
Expand All @@ -218,10 +222,17 @@ def search(self, search_query: str, tags: Set[str] = set(),
"WHERE dpf.rowid = dp.id AND dp.doc_id = d.id AND "
"dpf.doccontent MATCH ? ORDER BY rank")
parameters.append(search_query)
search_query_idx = len(parameters) - 1
if limit is not None:
parameters.append(f"{limit}")
query += " LIMIT ?"
yield from self.db_con.execute(query, parameters)
try:
# use fts syntax
yield from self.db_con.execute(query, parameters)
except sqlite3.OperationalError:
parameters[search_query_idx] = self._quote_string(search_query)
print(parameters[search_query_idx])
yield from self.db_con.execute(query, parameters)

def open_document(self, doc_type, href, elem_idx):
self.supported_types[doc_type].open_doc(href, elem_idx)
Expand Down
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
[metadata]
name = knovleks
version = 0.0.1
version = 0.0.2
author = Loris Reiff
author_email = [email protected]
license = Apache 2.0
long_description = file: README.md, LICENSE
long_description_content_type = text/markdown

[options]
packages = find:
Expand All @@ -23,6 +25,7 @@ ignore = E701,E731

[flake8]
ignore = E701,E731
per-file-ignores = __init__.py:F401
exclude = tests/context.py
statistics = true
show-source = true

0 comments on commit 38e996c

Please sign in to comment.