Skip to content

Commit

Permalink
Wrap custom IndexData
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr committed Jul 11, 2022
1 parent 854c75b commit 1e2f56b
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 11 deletions.
69 changes: 69 additions & 0 deletions libzim/libwrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ ObjWrapper::~ObjWrapper()
template<typename Output>
Output _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error);

template<>
bool _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return bool_cy_call_fct(obj, methodName, &error);
}

template<>
std::string _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return string_cy_call_fct(obj, methodName, &error);
Expand All @@ -81,6 +86,11 @@ uint64_t _callMethodOnObj(PyObject *obj, const std::string& methodName, std::str
return uint64_cy_call_fct(obj, methodName, &error);
}

template<>
uint32_t _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return uint32_cy_call_fct(obj, methodName, &error);
}

template<>
zim::Blob _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return blob_cy_call_fct(obj, methodName, &error);
Expand All @@ -92,12 +102,24 @@ _callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& erro
return std::unique_ptr<zim::writer::ContentProvider>(contentprovider_cy_call_fct(obj, methodName, &error));
}

template<>
std::shared_ptr<zim::writer::IndexData>
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return std::shared_ptr<zim::writer::IndexData>(indexdata_cy_call_fct(obj, methodName, &error));
}

template<>
zim::writer::Hints
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return hints_cy_call_fct(obj, methodName, &error);
}

template<>
zim::writer::IndexData::GeoPosition
_callMethodOnObj(PyObject *obj, const std::string& methodName, std::string& error) {
return geoposition_cy_call_fct(obj, methodName, &error);
}

// This cpp function call a python method on a python object.
// It checks that we are in a valid state and handle any potential error coming from python.
template<typename Output>
Expand Down Expand Up @@ -130,6 +152,44 @@ zim::Blob ContentProviderWrapper::feed()
return callMethodOnObj<zim::Blob>(m_obj, "feed");
}


/*
################################
# Index Data Wrapper #
################################
*/

bool IndexDataWrapper::hasIndexData() const
{
return callMethodOnObj<bool>(m_obj, "has_indexdata");
}

std::string IndexDataWrapper::getTitle() const
{
return callMethodOnObj<std::string>(m_obj, "get_title");
}

std::string IndexDataWrapper::getContent() const
{
return callMethodOnObj<std::string>(m_obj, "get_content");
}

std::string IndexDataWrapper::getKeywords() const
{
return callMethodOnObj<std::string>(m_obj, "get_keywords");
}

uint32_t IndexDataWrapper::getWordCount() const
{
return callMethodOnObj<std::uint32_t>(m_obj, "get_wordcount");
}

zim::writer::IndexData::GeoPosition IndexDataWrapper::getGeoPosition() const
{
return callMethodOnObj<zim::writer::IndexData::GeoPosition>(m_obj, "get_geoposition");
}


/*
#########################
# WriterItem #
Expand Down Expand Up @@ -161,6 +221,15 @@ WriterItemWrapper::getContentProvider() const
return callMethodOnObj<std::unique_ptr<zim::writer::ContentProvider>>(m_obj, "get_contentprovider");
}

std::shared_ptr<zim::writer::IndexData>
WriterItemWrapper::getIndexData() const
{
if (!obj_has_attribute(m_obj, "get_indexdata")) {
return zim::writer::Item::getIndexData();
}
return callMethodOnObj<std::shared_ptr<zim::writer::IndexData>>(m_obj, "get_indexdata");
}

zim::writer::Hints WriterItemWrapper::getHints() const
{
return callMethodOnObj<zim::writer::Hints>(m_obj, "get_hints");
Expand Down
14 changes: 14 additions & 0 deletions libzim/libwrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ class WriterItemWrapper : public zim::writer::Item, private ObjWrapper
std::string getTitle() const override;
std::string getMimeType() const override;
std::unique_ptr<zim::writer::ContentProvider> getContentProvider() const override;
std::shared_ptr<zim::writer::IndexData> getIndexData() const override;
zim::writer::Hints getHints() const override;
};

Expand All @@ -317,6 +318,19 @@ class ContentProviderWrapper : public zim::writer::ContentProvider, private ObjW
zim::Blob feed() override;
};

class IndexDataWrapper: public zim::writer::IndexData, private ObjWrapper
{
public:
IndexDataWrapper(PyObject *obj) : ObjWrapper(obj) {};
~IndexDataWrapper() = default;
bool hasIndexData() const override;
std::string getTitle() const override;
std::string getContent() const override;
std::string getKeywords() const override;
uint32_t getWordCount() const override;
IndexData::GeoPosition getGeoPosition() const override;
};


// Small helpers

Expand Down
96 changes: 85 additions & 11 deletions libzim/libzim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ import pathlib
import sys
import traceback
from types import ModuleType
from typing import Dict, Generator, Iterator, List, Set, Union
from typing import Dict, Generator, Iterator, List, Optional, Set, Tuple, Union
from uuid import UUID

from cpython.buffer cimport PyBUF_WRITABLE
from cpython.ref cimport PyObject

from cython.operator import preincrement

from libc.stdint cimport uint64_t
from libc.stdint cimport uint32_t, uint64_t
from libcpp cimport bool
from libcpp.map cimport map
from libcpp.memory cimport shared_ptr
Expand Down Expand Up @@ -87,6 +87,13 @@ cdef object call_method(object obj, string method):
# object to the correct cpp type.
# Will be used by cpp side to call python method.
cdef public api:
bool obj_has_attribute(object obj, string attribute) with gil:
"""Check if a object has a given attribute"""
attr = getattr(obj, attribute.decode('UTF-8'), None)
if not attr:
return False
return True

string string_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a string"""
try:
Expand Down Expand Up @@ -122,15 +129,27 @@ cdef public api:

return NULL

# currently have no virtual method returning a bool (was should_index/compress)
# bool bool_cy_call_fct(object obj, string method, string *error) with gil:
# """Lookup and execute a pure virtual method on object returning a bool"""
# try:
# func = getattr(obj, method.decode('UTF-8'))
# return func()
# except Exception as e:
# error[0] = traceback.format_exc().encode('UTF-8')
# return False
zim.IndexData* indexdata_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a IndexData"""
try:
indexData = call_method(obj, method)
if not indexData:
# indexData is none
return NULL;
return new zim.IndexDataWrapper(<PyObject*>indexData)
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return NULL

bool bool_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a bool"""
try:
return call_method(obj, method)
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return False

uint64_t uint64_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning an uint64_t"""
Expand All @@ -141,6 +160,26 @@ cdef public api:

return 0

uint32_t uint32_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning an uint_32"""
try:
return <uint32_t> call_method(obj, method)
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return 0

zim.GeoPosition geoposition_cy_call_fct(object obj, string method, string *error) with gil:
"""Lookup and execute a pure virtual method on object returning a GeoPosition"""
try:
geoPosition = call_method(obj, method)
if geoPosition:
return zim.GeoPosition(True, geoPosition[0], geoPosition[1]);
except Exception as e:
error[0] = traceback.format_exc().encode('UTF-8')

return zim.GeoPosition(False, 0, 0)

map[zim.HintKeys, uint64_t] convertToCppHints(dict hintsDict):
"""C++ Hints from Python dict"""
cdef map[zim.HintKeys, uint64_t] ret;
Expand Down Expand Up @@ -440,6 +479,40 @@ class FileProvider(ContentProvider):
yield WritingBlob(res)
res = fh.read(bsize)

class IndexData:
""" IndexData stub to override

Return a subclass of it in Item.get_indexdata()"""
__module__ = writer_module_name

def has_indexdata(self) -> bool:
"""Return true if the IndexData actually contains data"""
return False

def get_title(self) -> str:
"""Title to index. Might be the same as Item.get_title or not"""
raise NotImplementedError("get_title must be implemented.")

def get_content(self) -> str:
"""Content to index. Might be the same as Item.get_title or not"""
raise NotImplementedError("get_content must be implemented.")

def get_keywords(self) -> str:
"""Keywords used to index the item.

Must be a string containing keywords separated by a space"""
raise NotImplementedError("get_keywords must be implemented.")

def get_wordcount(self) -> int:
"""Number of word in content"""
raise NotImplementedError("get_wordcount must be implemented.")

def get_geoposition(self) -> Optional[Tuple[float, float]]:
"""GeoPosition used to index the item.

Must be a tuple (latitude, longitude) or None"""
return None


class BaseWritingItem:
"""Item stub to override
Expand Down Expand Up @@ -530,6 +603,7 @@ writer_public_objects = [
ContentProvider,
FileProvider,
StringProvider,
IndexData,
pascalize
]
writer = create_module(writer_module_name, writer_module_doc, writer_public_objects)
Expand Down
10 changes: 10 additions & 0 deletions libzim/zim.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ cdef extern from "zim/writer/item.h" namespace "zim::writer":
COMPRESS
FRONT_ARTICLE

cdef cppclass IndexData:
pass

cdef extern from "zim/writer/item.h" namespace "zim::writer::IndexData":
cppclass GeoPosition:
GeoPosition()
GeoPosition(bool, double, double)

cdef extern from "zim/writer/contentProvider.h" namespace "zim::writer":
cdef cppclass ContentProvider:
pass
Expand Down Expand Up @@ -92,6 +100,8 @@ cdef extern from "libwrapper.h":
ContentProviderWrapper(PyObject* obj) except +
cdef cppclass WriterItemWrapper:
WriterItemWrapper(PyObject* obj) except +
cdef cppclass IndexDataWrapper(IndexData):
IndexDataWrapper(PyObject* obj) except +

Compression comp_from_int(int)

Expand Down
49 changes: 49 additions & 0 deletions tests/test_libzim_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Creator,
FileProvider,
Hint,
IndexData,
Item,
StringProvider,
)
Expand Down Expand Up @@ -639,6 +640,54 @@ def test_hints_values(fpath):
)


@pytest.mark.parametrize(
"indexData, customContent, search_expected",
[
(None, "", [("standard", 1), ("home", 0), ("computer", 0)]),
(False, "", [("standard", 1), ("home", 0), ("computer", 0)]),
(True, "home", [("standard", 1), ("home", 1), ("computer", 0)]),
(True, "computer", [("standard", 1), ("home", 0), ("computer", 1)]),
(True, "standard", [("standard", 2), ("home", 0), ("computer", 0)]),
],
)
def test_custom_indexdata(
fpath, lipsum_item, lipsum, indexData, customContent, search_expected
):
item = StaticItem(path=HOME_PATH + "custom", content=lipsum, mimetype="text/html")
if indexData is None:
item.get_indexdata = lambda: None
else:

class CustomIndexData(IndexData):
def has_indexdata(self):
return indexData

def get_title(self):
return ""

def get_content(self):
return customContent

def get_keywords(self):
return ""

def get_wordcount(self):
return 1

item.get_indexdata = CustomIndexData

with Creator(fpath).config_indexing(True, "eng") as c:
c.add_item(lipsum_item)
c.add_item(item)

zim = Archive(fpath)
searcher = Searcher(zim)
for search_query, expected in search_expected:
query = Query().set_query(search_query)
search = searcher.search(query)
assert search.getEstimatedMatches() == expected


def test_reimpfeed(fpath):
class AContentProvider:
def __init__(self):
Expand Down

0 comments on commit 1e2f56b

Please sign in to comment.