Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrap indexData #149

Merged
merged 4 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 70 additions & 1 deletion libzim/libwrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,24 @@ ObjWrapper::~ObjWrapper()
template<typename Output>
Output _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error);

template<>
bool _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return bool_cy_call_fct(obj, methodName, &error);
}

template<>
std::string _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return string_cy_call_fct(obj, methodName, &error);
}

template<>
uint64_t _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return int_cy_call_fct(obj, methodName, &error);
return uint64_cy_call_fct(obj, methodName, &error);
}

template<>
uint32_t _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return uint32_cy_call_fct(obj, methodName, &error);
}

template<>
Expand All @@ -92,12 +102,24 @@ _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& erro
return std::unique_ptr<zim::writer::ContentProvider>(contentprovider_cy_call_fct(obj, methodName, &error));
}

template<>
std::shared_ptr<zim::writer::IndexData>
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return std::shared_ptr<zim::writer::IndexData>(indexdata_cy_call_fct(obj, methodName, &error));
}

template<>
zim::writer::Hints
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return hints_cy_call_fct(obj, methodName, &error);
}

template<>
zim::writer::IndexData::GeoPosition
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return geoposition_cy_call_fct(obj, methodName, &error);
}

// This cpp function call a python method on a python object.
// It checks that we are in a valid state and handle any potential error coming from python.
template<typename Output>
Expand Down Expand Up @@ -130,6 +152,44 @@ zim::Blob ContentProviderWrapper::feed()
return callMethodOnObj<zim::Blob>(m_obj, "feed");
}


/*
################################
# Index Data Wrapper #
################################
*/

bool IndexDataWrapper::hasIndexData() const
{
return callMethodOnObj<bool>(m_obj, "has_indexdata");
}

std::string IndexDataWrapper::getTitle() const
{
return callMethodOnObj<std::string>(m_obj, "get_title");
}

std::string IndexDataWrapper::getContent() const
{
return callMethodOnObj<std::string>(m_obj, "get_content");
}

std::string IndexDataWrapper::getKeywords() const
{
return callMethodOnObj<std::string>(m_obj, "get_keywords");
}

uint32_t IndexDataWrapper::getWordCount() const
{
return callMethodOnObj<std::uint32_t>(m_obj, "get_wordcount");
}

zim::writer::IndexData::GeoPosition IndexDataWrapper::getGeoPosition() const
{
return callMethodOnObj<zim::writer::IndexData::GeoPosition>(m_obj, "get_geoposition");
}


/*
#########################
# WriterItem #
Expand Down Expand Up @@ -161,6 +221,15 @@ WriterItemWrapper::getContentProvider() const
return callMethodOnObj<std::unique_ptr<zim::writer::ContentProvider>>(m_obj, "get_contentprovider");
}

std::shared_ptr<zim::writer::IndexData>
WriterItemWrapper::getIndexData() const
{
if (!obj_has_attribute(m_obj, "get_indexdata")) {
return zim::writer::Item::getIndexData();
}
return callMethodOnObj<std::shared_ptr<zim::writer::IndexData>>(m_obj, "get_indexdata");
}

zim::writer::Hints WriterItemWrapper::getHints() const
{
return callMethodOnObj<zim::writer::Hints>(m_obj, "get_hints");
Expand Down
14 changes: 14 additions & 0 deletions libzim/libwrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ class WriterItemWrapper : public zim::writer::Item, private ObjWrapper
std::string getTitle() const override;
std::string getMimeType() const override;
std::unique_ptr<zim::writer::ContentProvider> getContentProvider() const override;
std::shared_ptr<zim::writer::IndexData> getIndexData() const override;
zim::writer::Hints getHints() const override;
};

Expand All @@ -317,6 +318,19 @@ class ContentProviderWrapper : public zim::writer::ContentProvider, private ObjW
zim::Blob feed() override;
};

class IndexDataWrapper: public zim::writer::IndexData, private ObjWrapper
{
public:
IndexDataWrapper(PyObject *obj) : ObjWrapper(obj) {};
~IndexDataWrapper() = default;
bool hasIndexData() const override;
std::string getTitle() const override;
std::string getContent() const override;
std::string getKeywords() const override;
uint32_t getWordCount() const override;
IndexData::GeoPosition getGeoPosition() const override;
};


// Small helpers

Expand Down
99 changes: 85 additions & 14 deletions libzim/libzim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ import pathlib
import sys
import traceback
from types import ModuleType
from typing import Dict, Generator, Iterator, List, Set, Union
from typing import Dict, Generator, Iterator, List, Optional, Set, Tuple, Union
from uuid import UUID

from cpython.buffer cimport PyBUF_WRITABLE
from cpython.ref cimport PyObject

from cython.operator import preincrement

from libc.stdint cimport uint64_t
from libc.stdint cimport uint32_t, uint64_t
from libcpp cimport bool
from libcpp.map cimport map
from libcpp.memory cimport shared_ptr
Expand Down Expand Up @@ -87,6 +87,10 @@ cdef object call_method(object obj, string method):
# object to the correct cpp type.
# Will be used by cpp side to call python method.
cdef public api:
bool obj_has_attribute(object obj, string attribute) with gil:
"""Check if a object has a given attribute"""
return hasattr(obj, attribute.decode('UTF-8'))

string string_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a string"""
try:
Expand Down Expand Up @@ -122,25 +126,57 @@ cdef public api:

return NULL

# currently have no virtual method returning a bool (was should_index/compress)
# bool bool_cy_call_fct(object obj, string method, string *error) with gil:
# """Lookup and execute a pure virtual method on object returning a bool"""
# try:
# func = getattr(obj, method.decode('UTF-8'))
# return func()
# except Exception as e:
# error[0] = traceback.format_exc().encode('UTF-8')
# return False

uint64_t int_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning an int"""
zim.IndexData* indexdata_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a IndexData"""
try:
indexData = call_method(obj, method)
if not indexData:
# indexData is none
return NULL;
return new zim.IndexDataWrapper(<PyObject*>indexData)
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return NULL

bool bool_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a bool"""
try:
return call_method(obj, method)
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return False

uint64_t uint64_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning an uint64_t"""
try:
return <uint64_t> call_method(obj, method)
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return 0

uint32_t uint32_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning an uint_32"""
try:
return <uint32_t> call_method(obj, method)
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return 0

zim.GeoPosition geoposition_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a GeoPosition"""
try:
geoPosition = call_method(obj, method)
if geoPosition:
return zim.GeoPosition(True, geoPosition[0], geoPosition[1]);
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return zim.GeoPosition(False, 0, 0)

map[zim.HintKeys, uint64_t] convertToCppHints(dict hintsDict):
"""C++ Hints from Python dict"""
cdef map[zim.HintKeys, uint64_t] ret;
Expand Down Expand Up @@ -439,6 +475,40 @@ class FileProvider(ContentProvider):
yield WritingBlob(res)
res = fh.read(bsize)

class IndexData:
""" IndexData stub to override

Return a subclass of it in Item.get_indexdata()"""
__module__ = writer_module_name

def has_indexdata(self) -> bool:
"""Return true if the IndexData actually contains data"""
return False

def get_title(self) -> str:
"""Title to index. Might be the same as Item.get_title or not"""
raise NotImplementedError("get_title must be implemented.")

def get_content(self) -> str:
"""Content to index. Might be the same as Item.get_title or not"""
raise NotImplementedError("get_content must be implemented.")

def get_keywords(self) -> str:
"""Keywords used to index the item.

Must be a string containing keywords separated by a space"""
raise NotImplementedError("get_keywords must be implemented.")

def get_wordcount(self) -> int:
"""Number of word in content"""
raise NotImplementedError("get_wordcount must be implemented.")

def get_geoposition(self) -> Optional[Tuple[float, float]]:
"""GeoPosition used to index the item.

Must be a tuple (latitude, longitude) or None"""
return None


class BaseWritingItem:
"""Item stub to override
Expand Down Expand Up @@ -529,6 +599,7 @@ writer_public_objects = [
ContentProvider,
FileProvider,
StringProvider,
IndexData,
pascalize
]
writer = create_module(writer_module_name, writer_module_doc, writer_public_objects)
Expand Down
10 changes: 10 additions & 0 deletions libzim/zim.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ cdef extern from "zim/writer/item.h" namespace "zim::writer":
COMPRESS
FRONT_ARTICLE

cdef cppclass IndexData:
pass

cdef extern from "zim/writer/item.h" namespace "zim::writer::IndexData":
cppclass GeoPosition:
GeoPosition()
GeoPosition(bool, double, double)

cdef extern from "zim/writer/contentProvider.h" namespace "zim::writer":
cdef cppclass ContentProvider:
pass
Expand Down Expand Up @@ -91,6 +99,8 @@ cdef extern from "libwrapper.h":
ContentProviderWrapper(PyObject* obj) except +
cdef cppclass WriterItemWrapper:
WriterItemWrapper(PyObject* obj) except +
cdef cppclass IndexDataWrapper(IndexData):
IndexDataWrapper(PyObject* obj) except +

Compression comp_from_int(int)

Expand Down
58 changes: 58 additions & 0 deletions tests/test_libzim_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Creator,
FileProvider,
Hint,
IndexData,
Item,
StringProvider,
)
Expand Down Expand Up @@ -639,6 +640,63 @@ def test_hints_values(fpath):
)


@pytest.mark.parametrize(
"indexData, customContent, search_expected",
[
(None, "", [("standard", 1), ("home", 0), ("computer", 0)]),
(False, "", [("standard", 1), ("home", 0), ("computer", 0)]),
(True, "home", [("standard", 1), ("home", 1), ("computer", 0)]),
(True, "computer", [("standard", 1), ("home", 0), ("computer", 1)]),
(True, "standard", [("standard", 2), ("home", 0), ("computer", 0)]),
],
)
def test_custom_indexdata(
fpath, lipsum_item, lipsum, indexData, customContent, search_expected
):
item = StaticItem(path=HOME_PATH + "custom", content=lipsum, mimetype="text/html")
if indexData is None:
item.get_indexdata = lambda: None
else:

class CustomIndexData(IndexData):
def has_indexdata(self):
return indexData

def get_title(self):
return ""

def get_content(self):
return customContent

def get_keywords(self):
return ""

def get_wordcount(self):
return 1

item.get_indexdata = CustomIndexData

with Creator(fpath).config_indexing(True, "eng") as c:
c.add_item(lipsum_item)
c.add_item(item)

zim = Archive(fpath)
searcher = Searcher(zim)
for search_query, expected in search_expected:
query = Query().set_query(search_query)
search = searcher.search(query)
assert search.getEstimatedMatches() == expected


def test_indexdata_interface():
default_id = IndexData()
assert default_id.has_indexdata() is False
for method in ("title", "content", "keywords", "wordcount"):
with pytest.raises(NotImplementedError):
getattr(default_id, f"get_{method}")()
assert default_id.get_geoposition() is None


def test_reimpfeed(fpath):
class AContentProvider:
def __init__(self):
Expand Down