diff --git a/.gitmodules b/.gitmodules index 4f57be9e9c..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "modules/ppocr"] - path = modules/ppocr - url = ../ppocr.git diff --git a/jam/includes.jam b/jam/includes.jam index 40df3da516..3c7be43a23 100644 --- a/jam/includes.jam +++ b/jam/includes.jam @@ -3,7 +3,6 @@ for f in $(REDEMPTION_INCLUDE_PATH) $(SYSTEM_SRC_PATH) $(OCR1_INCLUDE_PATH) - $(MLN_INCLUDES_PATH) $(PPOCR_INCLUDES_PATH) ## configs @{ $(REDEMPTION_CONFIG_PATH)/redemption_src diff --git a/jam/redemption-config.jam b/jam/redemption-config.jam index 27c858d107..2889caf1a1 100644 --- a/jam/redemption-config.jam +++ b/jam/redemption-config.jam @@ -12,11 +12,10 @@ REDEMPTION_CONFIG_AUTOGEN_PATH ?= $(REDEMPTION_CONFIG_PATH)/autogen/include ; OCR1_INCLUDE_PATH ?= $(REDEMPTION_PUBLIC_PATH)/projects/ocr1/include ; -PPOCR_MODULE_PATH ?= $(REDEMPTION_MODULES_SRC_PATH)/ppocr ; -PPOCR_SRC_PATH ?= $(PPOCR_MODULE_PATH)/src ; +PPOCR_MODULE_PATH ?= $(REDEMPTION_PUBLIC_PATH)/projects/ppocr ; +PPOCR_SRC_PATH ?= $(PPOCR_MODULE_PATH) ; PPOCR_INCLUDES_PATH ?= $(PPOCR_SRC_PATH) ; -MLN_INCLUDES_PATH ?= $(PPOCR_MODULE_PATH)/includes/mln ; REDEMPTION_SYS_PATH ?= $(REDEMPTION_PUBLIC_PATH)/sys ; FIXTURES_PATH ?= $(REDEMPTION_PUBLIC_PATH)/tests/includes/fixtures ; diff --git a/jam/redemption-dependencies.jam b/jam/redemption-dependencies.jam index 6982f776c2..941c47e78f 100644 --- a/jam/redemption-dependencies.jam +++ b/jam/redemption-dependencies.jam @@ -40,17 +40,14 @@ constant LIB_DEPENDENCIES : release:-fvisibility=hidden shared -Wl,-Bsymbolic - # $(GCOV_NO_BUILD) ; EXE_DEPENDENCIES += app_path_exe.o log.o - $(GCOV_NO_BUILD) ; constant EXE_DEPENDENCIES_NO_SYSLOG : app_path_exe.o log_print.o - $(GCOV_NO_BUILD) ; diff --git a/modules/ppocr b/modules/ppocr deleted file mode 160000 index c359396bb4..0000000000 --- a/modules/ppocr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c359396bb40837f8d9a16a501e1cca42a3729749 diff --git a/projects/ppocr/mln/core/contract.hh b/projects/ppocr/mln/core/contract.hh new file mode 100644 index 0000000000..c0842338cb --- /dev/null +++ b/projects/ppocr/mln/core/contract.hh @@ -0,0 +1,57 @@ +// Copyright (C) 2007, 2008, 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_CORE_CONTRACT_HH +# define MLN_CORE_CONTRACT_HH + +/// \file +/// +/// Definition of the set of contracts. + +# include + +// Define a preprocessor constant HAS_NDEBUG reflecting the existence +// of NDEBUG, as avised by the section Conditional Compilation of the +// GNU Coding Standards +// (http://www.gnu.org/prep/standards/html_node/Conditional-Compilation.html). +#ifdef NDEBUG +#define HAS_NDEBUG 1 +#else +#define HAS_NDEBUG 0 +#endif + +/// Assertion. +# define mln_assertion(expr) assert(expr) + +/// Invariant. +# define mln_invariant(expr) assert(expr) + +/// Precondition. +# define mln_precondition(expr) assert(expr) + +/// Postcondition. +# define mln_postcondition(expr) assert(expr) + +#endif // ! MLN_CORE_CONTRACT_HH diff --git a/projects/ppocr/mln/core/def/coord.hh b/projects/ppocr/mln/core/def/coord.hh new file mode 100644 index 0000000000..8a0882b142 --- /dev/null +++ b/projects/ppocr/mln/core/def/coord.hh @@ -0,0 +1,51 @@ +// Copyright (C) 2008, 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_CORE_DEF_COORD_HH +# define MLN_CORE_DEF_COORD_HH + +/*! \file + * + * \brief Definition of the default coordinate type. + */ + + +namespace mln +{ + + namespace def + { + + + /// Definition of the default coordinate type: 'short'. + typedef short coord; + + + } // end of namespace mln::def + +} // end of namespace mln + + +#endif // ! MLN_CORE_DEF_COORD_HH diff --git a/projects/ppocr/mln/core/point.hh b/projects/ppocr/mln/core/point.hh new file mode 100644 index 0000000000..b7efb1990e --- /dev/null +++ b/projects/ppocr/mln/core/point.hh @@ -0,0 +1,152 @@ +// coordopyright (coord) 2007, 2008, 2009, 2010 EPITA Research and Development +// Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERcoordHANTABILITY or FITNESS FOR A PARTIcoordULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_coordORE_POINT_HH +# define MLN_coordORE_POINT_HH + +/// \file +/// +/// \brief Definition of the generic point2d class mln::point2d. +/// + +# include +# include + +# include +# include + +namespace mln +{ + + struct point2d + { + /// \var dim + /// Dimension of the space. + /// \invariant dim > 0 + enum { dim = 2 }; + + /// coordoordinate associated type. + typedef ::mln::def::coord coord; + + /// Read-only access to the \p i-th coordinate value. + /// \param[in] i The coordinate index. + /// \pre \p i < \c dim + const coord& operator[](unsigned i) const + { + mln_precondition(i < dim); + return this->coord_[i]; + } + + /// Read-write access to the \p i-th coordinate value. + /// \param[in] i The coordinate index. + /// \pre \p i < \c dim + coord& operator[](unsigned i) + { + mln_precondition(i < dim); + return this->coord_[i]; + } + + /// coordonstructor without argument. + point2d() + { + } + + point2d(coord prow, coord pcol) + { + coord_[0] = prow; + coord_[1] = pcol; + } + + /// Point with all coordinates set to the maximum value. + static point2d plus_infty() + { + return point2d(std::numeric_limits::max(), std::numeric_limits::max()); + } + + /// Point with all coordinates set to the mininum value. + static point2d minus_infty() + { + return point2d(std::numeric_limits::min(), std::numeric_limits::min()); + } + + const coord& row() const + { + return this->coord_[0]; + } + + coord& row() + { + return this->coord_[0]; + } + + const coord& col() const + { + return this->coord_[1]; + } + + coord& col() + { + return this->coord_[1]; + } + + protected: + coord coord_[dim]; + }; + + /// Equality comparison between a couple of grid point2d \p lhs + /// and \p rhs. + /*! + * \param[in] lhs A first grid point2d. + * \param[in] rhs A second grid point2d. + * + * \pre Both \p lhs and \p rhs have to be defined on the same + * topology; otherwise this test does not compile. + * + * \return True if both grid point2ds have the same + * coordinates, otherwise false. + */ + inline + bool operator==(const point2d& lhs, const point2d& rhs) + { + return lhs.row() == rhs.row() && lhs.col() == rhs.col(); + } + + /// Print a grid point2d \p p into the output stream \p ostr. + /*! \param[in,out] ostr An output stream. + * \param[in] p A grid point2d. + * + * \return The modified output stream \p ostr. + */ + inline + std::ostream& operator<<(std::ostream& ostr, const point2d& p) + { + return ostr << '(' << p[0] << ',' << p[1] << ')'; + } + +} // end of namespace mln + + +#endif // ! MLN_coordORE_POINT_HH diff --git a/projects/ppocr/mln/core/site_set/box.hh b/projects/ppocr/mln/core/site_set/box.hh new file mode 100644 index 0000000000..633074f85a --- /dev/null +++ b/projects/ppocr/mln/core/site_set/box.hh @@ -0,0 +1,210 @@ +// Copyright (C) 2007, 2008, 2009, 2010 Epoint2dITA Research and Development +// Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_CORE_SITE_SET_BOX_HH +# define MLN_CORE_SITE_SET_BOX_HH + +/// \file +/// +/// This file defines a generic box2d class. +/// +/// \todo Test if the safety code in box2d::box2d() is not too slow. + +# include +# include + +namespace mln +{ + + // Fwd decls. + struct box2d_fwd_piter_; + + /// \brief Generic box2d class: site set containing points of a + /// regular grid. + /// + /// Parameter \c point2d is the corresponding type of point. + /// + /// \ingroup modsitesetbasic + // + struct box2d + { + /// Dimension. + enum { dim = point2d::dim }; + + /// Site associated type. + typedef point2d site; + + /// Forward Site_Iterator associated type. + typedef box2d_fwd_piter_ fwd_piter; + + /// Site_Iterator associated type. + typedef fwd_piter piter; + + /// Minimum point. + const point2d & pmin() const + { + return pmin_; + } + + /// Reference to the minimum point. + point2d & pmin() + { + return pmin_; + } + + /// Maximum point. + const point2d & pmax() const + { + return pmax_; + } + + /// Reference to the maximum point. + point2d & pmax() + { + return pmax_; + } + + /// Constructor without argument. + box2d() + : pmin_(point2d::plus_infty()) + , pmax_(point2d::minus_infty()) + { + // FIXME: The code above can be slow; think about removing it... + } + + /// Constructor of a box2d going from \p pmin to \p pmax. + box2d(const site& point_min, const site& point_max) + : pmin_(point_min) + , pmax_(point_max) + { + mln_precondition(is_valid()); + } + + box2d(point2d::coord nbrows, point2d::coord nbcols) + : pmin_(0, 0) + , pmax_(--nbrows, --nbcols) + { + mln_precondition(nbrows != 0 && nbcols != 0); + mln_postcondition(is_valid()); + } + + /*! \brief Test if \p p belongs to the box2d. + * + * \param[in] p A point site. + */ + bool has(const point2d& p) const + { + mln_precondition(is_valid()); + return !(p[0] < pmin_[0] || p[0] > pmax_[0] || p[1] < pmin_[1] || p[1] > pmax_[1]); + } + + /// Test that the box2d owns valid data, i.e., is initialized and + /// with pmin being 'less-than' pmax. + bool is_valid() const + { + // Validity is: for all i, pmin_[i] <= pmax_[i]. + // Nota bene: a one-point box2d is valid. + return pmin_[0] <= pmax_[0] && pmin_[1] <= pmax_[1]; + } + + unsigned nrows() const + { + return this->len(0); + } + + point2d::coord min_row() const + { + return this->pmin()[0]; + } + + point2d::coord max_row() const + { + return this->pmax()[0]; + } + + unsigned ncols() const + { + return this->len(1); + } + + point2d::coord min_col() const + { + return this->pmin()[1]; + } + + point2d::coord max_col() const + { + return this->pmax()[1]; + } + + unsigned len(unsigned i) const + { + return this->is_valid() + ? 1 + this->pmax()[i] - this->pmin()[i] + : 0u; + } + + unsigned nsites() const + { + if (! this->is_valid()) + return 0; + return this->len(0) * this->len(1); + } + + protected: + + point2d pmin_, pmax_; + }; + + + inline bool operator==(const box2d& a, const box2d& b) + { + return a.min_row() == b.min_row() + && a.min_col() == b.min_col() + && a.max_row() == b.max_row() + && a.max_col() == b.max_col(); + } + + + /*! \brief point2drint a generic box2d \p b into the output stream \p ostr. + * + * \param[in,out] ostr An output stream. + * \param[in] b A generic box2d. + * + * \return The modified output stream \p ostr. + * + * \relates mln::box2d + */ + inline + std::ostream& operator<<(std::ostream& ostr, const box2d& b) + { + return ostr << "[" << b.pmin() << ".." << b.pmax() << ']'; + } + +} // end of namespace mln + + +#endif // ! MLN_CORE_SITE_SET_BOX_HH diff --git a/projects/ppocr/mln/core/site_set/box_piter.hh b/projects/ppocr/mln/core/site_set/box_piter.hh new file mode 100644 index 0000000000..301f7c15d8 --- /dev/null +++ b/projects/ppocr/mln/core/site_set/box_piter.hh @@ -0,0 +1,116 @@ +// Copyright (C) 2007, 2008, 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_CORE_SITE_SET_BOX_PITER_HH +# define MLN_CORE_SITE_SET_BOX_PITER_HH + +/// \file +/// +/// Definition of iterators on points of boxes. + +# include + + +namespace mln +{ + + /// \brief A generic forward iterator on points of boxes. + /// + /// The parameter \c P is the type of points. + /// + /// \see mln::box + /// + class box2d_fwd_piter_ + { + public: + // Make definitions from super class available. + enum { dim = point2d::dim }; + + /// Constructor without argument. + box2d_fwd_piter_() + { + } + + /// Constructor. + /// + /// \param[in] b A box. + /// + box2d_fwd_piter_(const mln::box2d& b) + : s_(b) + { + } + + /// Test the iterator validity. + bool is_valid() const + { + mln_precondition(s_.is_valid()); + return p_[0] != s_.pmax()[0] + 1; + } + + /// Start an iteration. + void start() + { + mln_precondition(s_.is_valid()); + p_ = s_.pmin(); + } + + /// Go to the next point. + void next() + { + mln_precondition(is_valid()); + if (p_[1] != s_.pmax()[1]) + { + ++p_[1]; + return; + } + p_[1] = s_.pmin()[1]; + + if (p_[0] != s_.pmax()[0]) + { + ++p_[0]; + return; + } + p_[0] = static_cast(s_.pmax()[0] + 1); + } + + operator point2d const &() const + { return this->p_; } + + point2d::coord row() const + { return this->p_.row(); } + + point2d::coord col() const + { return this->p_.col(); } + + protected: + point2d p_; + box2d s_; + }; + + +} // end of namespace mln + + +#endif // ! MLN_CORE_SITE_SET_BOX_PITER_HH diff --git a/projects/ppocr/mln/image/image2d.hh b/projects/ppocr/mln/image/image2d.hh new file mode 100644 index 0000000000..51ce6977a0 --- /dev/null +++ b/projects/ppocr/mln/image/image2d.hh @@ -0,0 +1,246 @@ +// Copyright (C) 2008, 2009, 2010 EPITA Research and Development +// Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_IMAGE_IMAGE2D_HH +# define MLN_IMAGE_IMAGE2D_HH + +#include +#include +#include +#include + +#include + +namespace mln { + + template + class image2d + { + struct data_type + { + static const unsigned bdr_ = 1; + T* buffer_ = nullptr; + std::size_t real_size = 0; + unsigned row_ = 0; + unsigned col_ = 0; + + data_type() = default; + + data_type(unsigned nrows, unsigned ncols) + : row_(nrows + this->bdr_ * 2) + , col_(ncols + this->bdr_ * 2) + { + this->allocate_(); + } + + ~data_type() + { + delete [] this->buffer_; + } + + operator bool () const + { + return this->buffer_; + } + + void allocate_() + { + this->real_size = this->row_ * this->col_; + this->buffer_ = new T[this->real_size]; + } + + void reallocate_(unsigned row, unsigned col) + { + this->row_ = row + this->bdr_ * 2; + this->col_ = col + this->bdr_ * 2; + + const std::size_t sz = this->row_ * this->col_; + + if (sz > this->real_size) { + delete [] this->buffer_; + this->buffer_ = nullptr; + this->real_size = sz; + this->buffer_ = new T[this->real_size]; + } + } + + int index(def::coord row, def::coord col) const + { + return (row + this->bdr_) * this->col_ + col + this->bdr_; + } + }; + + // Internal data, sharable by several images. + data_type data_; + + public: + /// Value associated type. + typedef T value; + + /// Site_Set associated type. + typedef box2d domain_t; + + /// Site associated type. + typedef box2d::site site; + + + /// Forward Site_Iterator associated type. + typedef box2d::fwd_piter fwd_piter; + + /// Site_Iterator associated type; default definition is + /// fwd_piter. + typedef fwd_piter piter; + + private: + image2d(const image2d&) /* = delete*/; + image2d& operator=(const image2d&) /* = delete*/; + + public: + /// Constructor without argument. + image2d() + : data_() + {} + + /// Constructor with the numbers of rows and columns and the + /// border thickness. + image2d(unsigned nbrows, unsigned nbcols) + : data_(nbrows, nbcols) + {} + + /// Initialize an empty image. + void init_(unsigned nbrows, unsigned nbcols) + { + assert(! this->is_valid()); + new (&this->data_) data_type(nbrows, nbcols); + } + + void realloc(unsigned nbrows, unsigned nbcols) + { + if (!this->data_) { + new (&this->data_) data_type(nbrows, nbcols); + } + else { + this->data_.reallocate_(nbrows, nbcols); + } + } + + /// Test if this image has been initialized; default impl. + bool is_valid() const + { + return this->data_; + } + + /// Test if \p p is valid. + bool has(const point2d& p) const + { + assert(this->is_valid()); + return box2d(this->data_.row_ - this->data_.bdr_*2, this->data_.col_ - this->data_.bdr_*2).has(p); + } + + /// Give the definition domain. + box2d domain() const + { + assert(this->is_valid()); + return box2d(this->nrows(), this->ncols()); + } + + /// Read-only access to the image value located at point \p p. + const T& operator()(const point2d& p) const + { + assert(this->has_with_border(p)); + return this->data_.buffer_[this->data_.index(p.row(), p.col())]; + } + + /// Read-write access to the image value located at point \p p. + T& operator()(const point2d& p) + { + assert(this->has_with_border(p)); + return this->data_.buffer_[this->data_.index(p.row(), p.col())]; + } + + // Specific methods: + // ----------------- + + /// Read-only access to the image value located at (\p row, \p col). + const T& at(def::coord row, def::coord col) const + { + assert(this->has_with_border(point2d(row, col))); + return this->data_.buffer_[this->data_.index(row, col)]; + } + + /// Read-write access to the image value located at (\p row, \p col). + T& at(def::coord row, def::coord col) + { + assert(this->has_with_border(point2d(row, col))); + return this->data_.buffer_[this->data_.index(row, col)]; + } + + /// Give the number of rows. + unsigned nrows() const + { + assert(this->is_valid()); + return this->data_.row_ - this->data_.bdr_ * 2; + } + + /// Give the number of columns. + unsigned ncols() const + { + assert(this->is_valid()); + return this->data_.col_ - this->data_.bdr_ * 2; + } + + /// Give the border thickness. + unsigned border() const + { + assert(this->is_valid()); + return this->data_.bdr_; + } + + void fill_border(value x) + { + if (this->data_.col_ && this->data_.row_) { + value * eptr = this->data_.buffer_ + this->data_.col_ * this->data_.bdr_ - this->data_.bdr_; + std::fill(this->data_.buffer_, eptr, x); + const unsigned nr = this->nrows(); + for (unsigned r = 0; r < nr; ++r) { + eptr[0] = x; + eptr[1] = x; + eptr += this->data_.col_; + } + std::fill(eptr, eptr + this->data_.col_ * this->data_.bdr_ + this->data_.bdr_, x); + } + } + + private: + bool has_with_border(const point2d& p) const + { + assert(this->is_valid()); + return box2d(this->data_.row_, this->data_.col_).has(mln::point2d(p.row()+1, p.col()+1)); + } + }; +} + +#endif diff --git a/projects/ppocr/mln/io/pbm/load.hh b/projects/ppocr/mln/io/pbm/load.hh new file mode 100644 index 0000000000..cb2a792a73 --- /dev/null +++ b/projects/ppocr/mln/io/pbm/load.hh @@ -0,0 +1,146 @@ +// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_IO_PBM_LOAD_HH +#define MLN_IO_PBM_LOAD_HH + +/// \file +/// +/// Define a function which loads an image of kind pbm with +/// given path. + + +#include +#include +#include + +#include +#include + + +namespace mln +{ + + namespace io + { + + namespace pbm + { + + /// Load a pbm image in a Milena image. + /// + /// \param[out] ima A reference to the image2d which will receive + /// data. + /// \param[in] filename The source. + /// + bool load(image2d& ima, const std::string& filename); + + +# ifndef MLN_INCLUDE_ONLY + + namespace internal + { + + /// load_ascii. + template + inline + void load_ascii(std::ifstream& file, image2d& ima) + { + typename image2d::fwd_piter p(ima.domain()); + for(p .start(); p .is_valid(); p .next()) + { + unsigned char value; + file >> value; + + mln_assertion(value == '0' || value == '1'); + ima(p) = (value == '0'); // In pbm, '0' means 'white' so 'object', thus 'true'! + } + } + + + /// load_raw_2d. + template + inline + void load_raw_2d(std::ifstream& file, image2d& ima) + { + typedef typename image2d::site::coord coord; + const coord min_row = 0; + const coord min_col = 0; + const coord max_row = ima.nrows(); + const coord max_col = ima.ncols(); + typename image2d::site p = point2d(0, min_col); + + char c = 0; + int i; + for (p.row() = min_row; p.row() < max_row; ++p.row()) + { + i = 0; + for (p.col() = min_col; p.col() < max_col; ++p.col()) + { + if (i % 8 == 0) + file.read(&c, 1); + ima(p) = !(c & 128); + c = static_cast(c * 2); + ++i; + } + } + } + + + } // end of namespace mln::io::internal + + + inline + bool load(image2d& ima, const std::string& filename) + { + std::ifstream file(filename.c_str()); + if (! file) + { + return false; + } + char type = 0; + int nrows, ncols; + if (!io::pnm::read_header('1', '4', file, type, nrows, ncols)) { + return false; + } + + ima.init_(nrows, ncols); + if (type == '4') + internal::load_raw_2d(file, ima); + else if (type == '1') + internal::load_ascii(file, ima); + return file.eof() == true || file.rdbuf()->sgetc() == std::ifstream::traits_type::eof(); + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace mln::io::pbm + + } // end of namespace mln::io + +} // end of namespace mln + + +#endif // ! MLN_IO_PBM_LOAD_HH diff --git a/projects/ppocr/mln/io/pnm/load.hh b/projects/ppocr/mln/io/pnm/load.hh new file mode 100644 index 0000000000..cec8b5cbca --- /dev/null +++ b/projects/ppocr/mln/io/pnm/load.hh @@ -0,0 +1,184 @@ +// Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_IO_PNM_LOAD_HH +#define MLN_IO_PNM_LOAD_HH + +/// \file +/// +/// Define a function which loads an image of kind PNM 8/16bits with +/// given path. + +#include +#include +#include + +#include +#include + +#include +#include + +namespace mln +{ + + namespace io + { + + namespace pnm + { + + template + bool load_ascii_value(std::ifstream& file, I& ima); + + template + bool load_ascii_builtin(std::ifstream& file, I& ima); + + // used when (sizeof(int_u8) != 1) + template + inline + void load_raw_2d_uncontiguous(std::ifstream& file, image2d& ima) + { + const def::coord min_row = 0; + const def::coord min_col = 0; + const def::coord max_row = ima.nrows(); + const def::coord max_col = ima.ncols(); + + point2d p; + for (p.row() = min_row; p.row() < max_row; ++p.row()) + for (p.col() = min_col; p.col() < max_col; ++p.col()) + ::mln::trait::value_::read_value(file, ima(p)); + } + + // used in g++ > 2.95 + template + inline + void load_raw_2d_contiguous(std::ifstream& file, image2d& ima) + { + point2d p = point2d(0, ima.ncols() - 1); + typedef typename image2d::site::coord coord; + const coord min_row = 0; + const coord max_row = ima.nrows(); + + std::size_t len = ima.ncols() * sizeof(V); + for (p.row() = min_row; p.row() < max_row; ++p.row()) + file.read(reinterpret_cast(&ima(p)), len); + } + + /// load_ascii for Milena value types. + template + inline + void load_ascii_value(std::ifstream& file, image2d& ima) + { + typename image2d::value::equiv c; + typename image2d::fwd_piter p(ima.domain()); + for (p .start(); p .is_valid(); p .next()) + { + file >> c; + ima(p) = c; + } + } + + /// load_ascii for builtin value types. + template + inline + void load_ascii_builtin(std::ifstream& file, image2d& ima) + { + typename image2d::fwd_piter p(ima.domain()); + + // FIXME: May be wrong! + // Worked out with an image with a max value of 255 + // loaded in an image2d. + typename image2d::value n; + + for (p.start(); p.is_valid(); p.next()) + { + ::mln::trait::value_::read_value(file, n); + ima(p) = n; + } + } + + /// load_raw_2d. + /// for all pnm 8/16 bits formats + template + inline + void load_raw_2d(std::ifstream& file, image2d& ima) + { + if (sizeof(V) == 1) + load_raw_2d_contiguous(file, ima); + else + load_raw_2d_uncontiguous(file, ima); + } + + /// An other way to load pnm files : + /// the destination is an argument to check if + /// the type match the file to load. + template + inline + bool load(char type_, + image2d& ima, + const std::string& filename) + { + std::ifstream file(filename.c_str()); + if (! file) + { + return false; + } + + char type = 0; + int nrows, ncols; + unsigned int maxval; + if (!read_header(static_cast(type_ - 3), type_, file, type, + nrows, ncols, maxval)) { + return false; + } + + if (::mln::trait::value_::max() != maxval) + { + std::cerr << "error: file '" << filename + << "' cannot be loaded into this type of image\n" + << "input image have " << maxval + << " as maximum value while the destination's one is " + << ::mln::trait::value_::max() << "." + << std::endl; + return false; + } + + ima.init_(nrows, ncols); + if (type == type_) + load_raw_2d(file, ima); + else if (type == (type_ - 3)) + pnm::load_ascii_builtin(file, ima); + return file.eof() == true || file.rdbuf()->sgetc() == std::ifstream::traits_type::eof(); + } + + } // end of namespace mln::io::pnm + + } // end of namespace mln::io + +} // end of namespace mln + + +#endif // ! MLN_IO_PNM_LOAD_HH diff --git a/projects/ppocr/mln/io/pnm/load_header.hh b/projects/ppocr/mln/io/pnm/load_header.hh new file mode 100644 index 0000000000..6cafdf89a3 --- /dev/null +++ b/projects/ppocr/mln/io/pnm/load_header.hh @@ -0,0 +1,136 @@ +// Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_IO_PNM_LOAD_HEADER_HH +#define MLN_IO_PNM_LOAD_HEADER_HH + +/// \file +/// +/// Define a function which loads header for PNM image. + +#include +#include +#include + + +namespace mln +{ + + namespace io + { + + namespace pnm + { + +# ifndef MLN_INCLUDE_ONLY + + inline + bool read_header(std::ifstream& istr, + char& type, + int& nrows, int& ncols, + unsigned int& maxval, + bool test = false) + { + // check magic + if (istr.get() != 'P' ) + goto err; + type = static_cast(istr.get()); + + if (type < '1' || type > '6') + goto err; + if (istr.get() != '\n') + goto err; + + // skip comments + while (istr.peek() == '#') + { + std::string line; + std::getline(istr, line); + } + + // get size + istr >> ncols >> nrows; + if (nrows <= 0 || ncols <= 0) + goto err; + + // get maxvalue + if (istr.get() != '\n') + goto err; + if (type != '1' && type != '4') + { + istr >> maxval; + if (istr.get() != '\n') + goto err; + } + return true; + + err: + if (! test) + { + std::cerr << "error: badly formed header!"; + } + return false; + } + + inline + bool read_header(char ascii, char raw, + std::ifstream& istr, + char& type, + int& nrows, int& ncols, + unsigned int& maxval) + { + if (!read_header(istr, type, nrows, ncols, maxval) + || ! (type == ascii || type == raw)) + { + std::cerr << "error: bad pnm type; " + << "expected P" << ascii + << " or P" << raw + << ", get P" << type << "!" << std::endl; + return false; + } + return true; + } + + inline + bool read_header(char ascii, char raw, + std::ifstream& istr, + char& type, + int& nrows, int& ncols) + { + unsigned int maxval; + return read_header(ascii, raw, istr, type, + nrows, ncols, maxval); + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace mln::io::pnm + + } // end of namespace mln::io + +} // end of namespace mln + + +#endif // ! MLN_IO_PNM_LOAD_HEADER_HH diff --git a/projects/ppocr/mln/io/pnm/macros.hh b/projects/ppocr/mln/io/pnm/macros.hh new file mode 100644 index 0000000000..8283a3134f --- /dev/null +++ b/projects/ppocr/mln/io/pnm/macros.hh @@ -0,0 +1,46 @@ +// Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_IO_PNM_MACROS_HH +# define MLN_IO_PNM_MACROS_HH + +/*! \file + * + * \brief Definition of pnm formats macros. + */ + +/// Portable Pixel Map Format +# define PPM '6' +# define PPM_ASCII '3' + +/// Portable Gray Map Format +# define PGM '5' +# define PGM_ASCII '2' + +/// Portable Bit Map Format +# define PBM '4' +# define PBM_ASCII '1' + +#endif // ! MLN_IO_PNM_MACROS_HH diff --git a/projects/ppocr/mln/io/ppm/load.hh b/projects/ppocr/mln/io/ppm/load.hh new file mode 100644 index 0000000000..398e27f93f --- /dev/null +++ b/projects/ppocr/mln/io/ppm/load.hh @@ -0,0 +1,67 @@ +// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_IO_PPM_LOAD_HH +#define MLN_IO_PPM_LOAD_HH + +/// \file +/// +/// Define a function which loads an image of kind ppm with +/// given path. + +#include + +#include +#include + + +namespace mln +{ + + namespace io + { + + namespace ppm + { + + /// Load a ppm image in a Milena image. + /// + /// \param[out] ima A reference to the image which will receive + /// data. + /// \param[in] filename The source. + template + bool load(image2d& ima, const std::string& filename) + { + return io::pnm::load(PPM, ima, filename); + } + + } // end of namespace mln::io::ppm + + } // end of namespace mln::io + +} // end of namespace mln + + +#endif // ! MLN_IO_PPM_LOAD_HH diff --git a/projects/ppocr/mln/trait_value_.hh b/projects/ppocr/mln/trait_value_.hh new file mode 100644 index 0000000000..8d27a09269 --- /dev/null +++ b/projects/ppocr/mln/trait_value_.hh @@ -0,0 +1,55 @@ +// Copyright (C) 2007, 2008, 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see . +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_TRAIT_VALUE_HH +# define MLN_TRAIT_VALUE_HH + +/// \file +/// +/// Some base trait types for value types. + +# include +# include + +namespace mln +{ + + namespace trait + { + + template + struct value_ { + static V max() { return std::numeric_limits::max(); } + //static V min() { return std::numeric_limits::min(); } + static void read_value(std::istream& is, V & v) { is >> v; } + }; + + + } // end of namespace mln::trait + +} // end of namespace mln + + +#endif // ! MLN_TRAIT_VALUE_HH diff --git a/projects/ppocr/ppocr/box_char/box.cpp b/projects/ppocr/ppocr/box_char/box.cpp new file mode 100644 index 0000000000..b0cfc641fe --- /dev/null +++ b/projects/ppocr/ppocr/box_char/box.cpp @@ -0,0 +1,30 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/box_char/box.hpp" + +#include +// #include + +namespace ppocr { + +std::ostream & operator<<(std::ostream & os, Box const & box) { + return os << box.index() << ' ' << box.bounds(); +} + +} diff --git a/projects/ppocr/ppocr/box_char/box.hpp b/projects/ppocr/ppocr/box_char/box.hpp new file mode 100644 index 0000000000..3130106aa6 --- /dev/null +++ b/projects/ppocr/ppocr/box_char/box.hpp @@ -0,0 +1,73 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_BOX_HPP +#define PPOCR_BOX_HPP + +#include "ppocr/image/coordinate.hpp" + +#include + + +namespace ppocr { + +struct Box +{ + Box() = default; + + Box(Index const & idx, Bounds const & bnd) + : idx_(idx) + , bounds_(bnd) + {} + + Box(Bounds const & bnd) + : bounds_(bnd) + {} + + size_t x() const noexcept { return idx_.x(); } + size_t y() const noexcept { return idx_.y(); } + + size_t w() const noexcept { return bounds_.w(); } + size_t h() const noexcept { return bounds_.h(); } + + size_t bottom() const noexcept { return y()+h()-1; } + size_t right() const noexcept { return x()+w()-1; } + size_t left() const noexcept { return x(); } + size_t top() const noexcept { return y(); } + + Index const & index() const noexcept { return idx_; } + Bounds const & bounds() const noexcept { return bounds_; } + + explicit operator bool() const noexcept { return w() && h(); } + + bool operator == (Box const & other) const noexcept + { return this->bounds_ == other.bounds_; } + bool operator != (Box const & other) const noexcept + { return !(*this == other); } + +private: + Index idx_; + Bounds bounds_; +}; + +std::ostream & operator<<(std::ostream & os, Box const & box); +std::istream & operator>>(std::istream & is, Box & box); + +} + +#endif diff --git a/projects/ppocr/ppocr/box_char/make_box_character.cpp b/projects/ppocr/ppocr/box_char/make_box_character.cpp new file mode 100644 index 0000000000..92c9a9daea --- /dev/null +++ b/projects/ppocr/ppocr/box_char/make_box_character.cpp @@ -0,0 +1,105 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/box_char/box.hpp" +#include "ppocr/box_char/make_box_character.hpp" +#include "ppocr/image/image.hpp" + +namespace ppocr { + +namespace utils { + bool horizontal_empty(Pixel const * d, size_t w) { + for (auto e = d+w; d != e; ++d) { + if (is_pix_letter(*d)) { + return false; + } + } + return true; + } + + bool vertical_empty(Pixel const * d, Bounds const & bnd) { + for (auto e = d + bnd.area(); d != e; d += bnd.w()) { + if (is_pix_letter(*d)) { + return false; + } + } + return true; + } +} + + +Box make_box_character(Image const & image, Index const & idx, Bounds const & bnd) +{ + size_t x = idx.x(); + + auto d = image.data({x, idx.y()}); + for (; x < bnd.w(); ++x) { + if (!utils::vertical_empty(d, bnd)) { + break; + } + ++d; + } + + size_t w = x; + + while (w + 1 < bnd.w()) { + ++w; + if ([&image](Pixel const * d, size_t w, size_t h) -> bool { + for (auto e = d+w*h; d != e; d += w) { + if (is_pix_letter(*d) && ( + (d+1 != e && is_pix_letter(*(d+1))) + || (d-w+1 >= image.data() && is_pix_letter(*(d-w+1))) + || (d+w+1 < e && is_pix_letter(*(d+w+1))) + )) { + return false; + } + } + return true; + }(d, bnd.w(), bnd.h())) { + break; + } + ++d; + } + w -= x; + + size_t y = idx.y(); + + d = image.data({x, y}); + for (; y < bnd.h(); ++y) { + if (!utils::horizontal_empty(d, w)) { + break; + } + d += bnd.w(); + } + + size_t h = bnd.h(); + + d = image.data({x, h}); + while (--h > y) { + d -= bnd.w(); + if (!utils::horizontal_empty(d, w)) { + break; + } + } + h -= y; + + ++h; + return {{x, y}, {w, h}}; +} + +} diff --git a/projects/ppocr/ppocr/box_char/make_box_character.hpp b/projects/ppocr/ppocr/box_char/make_box_character.hpp new file mode 100644 index 0000000000..98adefff56 --- /dev/null +++ b/projects/ppocr/ppocr/box_char/make_box_character.hpp @@ -0,0 +1,39 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_MAKE_BOX_CHARACTER_HPP +#define PPOCR_MAKE_BOX_CHARACTER_HPP + +#include "ppocr/box_char/box.hpp" +#include "ppocr/image/pixel.hpp" + +namespace ppocr { + +class Image; +class Bounds; + +namespace utils { + bool horizontal_empty(Pixel const * d, std::size_t w); + bool vertical_empty(Pixel const * d, Bounds const & bnd); +} + +Box make_box_character(Image const & image, Index const & idx, Bounds const & bnd); + +} + +#endif diff --git a/projects/ppocr/ppocr/container/trie.hpp b/projects/ppocr/ppocr/container/trie.hpp new file mode 100644 index 0000000000..012ceb1065 --- /dev/null +++ b/projects/ppocr/ppocr/container/trie.hpp @@ -0,0 +1,267 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_CONTAINER_TRIE_HPP +#define PPOCR_SRC_CONTAINER_TRIE_HPP + +#include +#include +#include +#include + +namespace ppocr { namespace container { + +template +struct trie +{ + class node_type + { + using value_type = T; + using values_ = std::vector; + + friend class trie; + + value_type value_; + bool is_terminal_; + trie nodes_; + + public: + using iterator = typename values_::const_iterator; + + template + node_type(U && x, bool is_terminal = false) + : value_(std::forward(x)) + , is_terminal_(is_terminal) + {} + + iterator begin() const { return this->nodes_.begin(); } + iterator end() const { return this->nodes_.end(); } + + template + iterator lower_bound(U const & x) const { return this->nodes_.lower_bound(x); } + + trie const & childrens() const { return this->nodes_; } + + bool is_terminal() const { return this->is_terminal_; } + std::size_t size() const { return this->nodes_.size(); } + bool empty() const { return this->nodes_.empty(); } + value_type const & get() const { return this->value_; } + }; + +private: + using values_ = std::vector; + +public: + using value_type = T; + using iterator = typename values_::const_iterator; + + trie() = default; + + /// \pre [first, last) is a sorted range + template + trie(RandIt first, RandIt last) { + this->insert_after(first, last); + } + + template + void insert_after(RandIt first, RandIt last, unsigned depth = 0u) { + while (first != last) { + using value_iterator = typename std::iterator_traits::value_type; + auto middle = std::upper_bound( + first, last, (*first)[depth], + [depth](T const & c, value_iterator const & s){ + return depth < s.size() && c < s[depth]; + } + ); + this->nodes_.emplace_back((*first)[depth], depth + 1 == first->size()); + if (first->size() == depth + 1) { + ++first; + } + this->trie_back_().insert_after(first, middle, depth+1); + first = middle; + } + } + + iterator begin() const { return this->nodes_.begin(); } + iterator end() const { return this->nodes_.end(); } + std::size_t size() const { return this->nodes_.size(); } + bool empty() const { return this->nodes_.empty(); } + + template + iterator lower_bound(U const & x) const { + return std::lower_bound(this->begin(), this->end(), x, [](node_type const & node, U const & x) { + return node.get() < x; + }); + } + +private: + trie & trie_back_() { return this->nodes_.back().nodes_; } + + values_ nodes_; +}; + + +template +struct flat_trie +{ + struct node_type; + + using iterator = node_type const *; + + struct range { + range(node_type const * first, node_type const * last) + : beg_(first) + , end_(last) + {} + + iterator begin() const { return this->beg_; } + iterator end() const { return this->end_; } + std::size_t size() const { return this->end() - this->begin(); } + std::size_t empty() const { return this->beg_ == this->end_; } + + template + iterator lower_bound(U const & x) const { + return std::lower_bound(this->begin(), this->end(), x, [](node_type const & node, U const & x) { + return node.get() < x; + }); + } + + private: + node_type const * beg_; + node_type const * end_; + }; + + struct node_type { + using size_type = unsigned; // TODO depends on T + using value_type = T; + + static_assert(~size_type{} >= ~typename std::make_unsigned::type(), "unimplemented"); + + node_type(typename trie::node_type const & node) + : count_(node.size()) + , is_terminal_(node.is_terminal()) + , x_(node.get()) + {} + + node_type(value_type x, size_type pos, size_type sz, bool is_terminal) + : pos_(pos) + , count_(sz) + , is_terminal_(is_terminal) + , x_(x) + {} + + range childrens() const { return {this->begin(), this->end()}; } + iterator begin() const { return this + this->pos_; } + iterator end() const { return this->begin() + this->size(); } + + template + iterator lower_bound(U const & x) const { return this->childrens().lower_bound(x); } + + bool empty() const { return this->count_ == 0; } + std::size_t size() const { return this->count_; } + std::size_t relative_pos() const { return this->pos_; } + bool is_terminal() const { return this->is_terminal_; } + value_type const & get() const { return x_; } + + private: + size_type pos_ = 0; + size_type count_; + bool is_terminal_; + value_type x_; + + friend class flat_trie; + }; + + using value_type = T; + + flat_trie() = default; + + template + flat_trie(FwIt first, FwIt last) + : elems_(first, last) + {} + + flat_trie(std::vector && nodes) + : elems_(std::move(nodes)) + {} + + flat_trie(std::vector const & nodes) + : elems_(nodes) + {} + +private: + template + struct deref_it_ : iterator_base + { + using value_type = trie; + using pointer = value_type const *; + using reference = trie const &; + using difference_type = typename std::iterator_traits::difference_type; + using iterator_category = typename std::iterator_traits::iterator_category; + + deref_it_(iterator_base base) : iterator_base(base) {} + reference operator * () const { return iterator_base::operator*().childrens(); } + }; + +public: + flat_trie(trie const & x) + : elems_(x.begin(), x.end()) + { + using iterator_base = decltype(x.begin()); + using deref_it = deref_it_; + using trie_ref = std::reference_wrapper const>; + std::vector ptries1(deref_it(x.begin()), deref_it(x.end())); + std::vector ptries2; + std::size_t pos = 0; + while (!ptries1.empty()) { + for (trie const & t : ptries1) { + std::size_t sz = elems_.size(); + elems_[pos].pos_ = sz-pos; + if (!t.empty()) { + elems_.insert(elems_.end(), t.begin(), t.end()); + ptries2.insert(ptries2.end(), deref_it(t.begin()), deref_it(t.end())); + } + ++pos; + } + using std::swap; + swap(ptries1, ptries2); + ptries2.clear(); + } + + this->elems_.shrink_to_fit(); + } + + range childrens() const + { + return { + &this->elems_[0], + &this->elems_[0] + (this->elems_.empty() ? 0u : this->elems_[0].relative_pos()) + }; + } + + node_type const & operator[](std::size_t const & i) const { return this->elems_[i]; } + + std::vector const & all() const { return this->elems_; } + +private: + std::vector elems_; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/defined_loader.hpp b/projects/ppocr/ppocr/defined_loader.hpp new file mode 100644 index 0000000000..a8acd7c7e9 --- /dev/null +++ b/projects/ppocr/ppocr/defined_loader.hpp @@ -0,0 +1,268 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_DEFINED_LOADER_HPP +#define PPOCR_SRC_DEFINED_LOADER_HPP + +#include "ppocr/loader2/datas_loader.hpp" + +#include "ppocr/strategies/alternation.hpp" +// #include "ppocr/strategies/direction.hpp" +#include "ppocr/strategies/hdirection.hpp" +#include "ppocr/strategies/hdirection2.hpp" +// #include "ppocr/strategies/agravity.hpp" +// #include "ppocr/strategies/gravity.hpp" +// #include "ppocr/strategies/compass.hpp" +// #include "ppocr/strategies/dcompass.hpp" +#include "ppocr/strategies/proportionality.hpp" +// #include "ppocr/strategies/gravity2.hpp" +#include "ppocr/strategies/hgravity.hpp" +#include "ppocr/strategies/hgravity2.hpp" + +#include "ppocr/strategies/dvdirection.hpp" +#include "ppocr/strategies/dvdirection2.hpp" +#include "ppocr/strategies/dvgravity.hpp" +#include "ppocr/strategies/dvgravity2.hpp" + +#include "ppocr/strategies/dzdensity.hpp" + +#include "ppocr/strategies/density.hpp" + +#include "ppocr/strategies/zone.hpp" +#include "ppocr/strategies/proportionality_zone.hpp" + +#include "ppocr/strategies/hbar.hpp" + +#include +#include + +namespace ppocr { + +#define REGISTRY(name) \ + loader2::Strategy +#define REGISTRY2(name) \ + loader2::Strategy, \ + loader2::Strategy + +template +struct mpl_strategies_list_t +{ static constexpr std::size_t size = sizeof...(Strategies); }; + +namespace details_ { + template + struct pp_ocr_merge_strategies; + + template + struct pp_ocr_merge_strategies, mpl_strategies_list_t> + { using type = mpl_strategies_list_t; }; + + template + struct pp_ocr_to_datas; + + template + struct DefaultDatas : loader2::Datas + { using loader2::Datas::Datas; }; + + template + struct pp_ocr_to_datas> + { using type = DefaultDatas; }; +} + +#ifdef IN_IDE_PARSER +using PpOcrDatas = loader2::Datas< +#else +using PpOcrSimpleDatas = mpl_strategies_list_t< +#endif + loader2::Strategy, + loader2::Strategy, + REGISTRY2(dzdensity), + loader2::Strategy, + loader2::Strategy, + + REGISTRY2(hdirection), + REGISTRY2(hdirection2), + + //REGISTRY(direction), + + //REGISTRY(agravity), + //REGISTRY(gravity), + + REGISTRY2(hgravity), + REGISTRY2(hgravity2), + + //REGISTRY(compass), + //REGISTRY(dcompass), + + REGISTRY (proportionality), + //REGISTRY(gravity2), + + REGISTRY2(dvdirection), + + REGISTRY2(dvgravity), + + REGISTRY (density) +#ifdef IN_IDE_PARSER +, +#else +>; + +using PpOcrComplexDatas = mpl_strategies_list_t< +#endif + REGISTRY2(hbar), + + REGISTRY (alternations) +#ifdef IN_IDE_PARSER +, +#else +>; + +using PpOcrExclusiveDatas = mpl_strategies_list_t< +#endif + REGISTRY (zone), + REGISTRY (proportionality_zone) +>; + +#ifndef IN_IDE_PARSER +using PpOcrDatas = details_::pp_ocr_to_datas< + details_::pp_ocr_merge_strategies< + PpOcrSimpleDatas, + details_::pp_ocr_merge_strategies::type + >::type +>::type; +#endif + + +// TODO other file + +namespace details_ { + struct WriteApplyData { + std::ostream & os_; + + template + void operator()(Data const & data) const { + if (!this->os_) { + return; + } + + for (auto & x : data.data()) { + this->write(x); + } + this->os_ << '\n'; + } + + private: + template + void write(T const & x) const { + this->os_ << x << ' '; + } + + template + void write(std::vector const & cont) const { + this->os_ << cont.size() << ' '; + for (auto & x : cont) { + this->os_ << x << ' '; + } + } + + template + void write(std::array const & arr) const { + for (auto & x : arr) { + this->os_ << x << ' '; + } + } + }; + + struct ReadApplyData { + std::istream & is_; + std::size_t data_sz_; + + void operator()() const { + if (this->is_) { + std::istream::sentry(this->is_); + } + } + + template + void operator()(loader2::Data & data, loader2::Data & ... other) const { + typename loader2::Data::container_type cont; + + if (this->is_) { + cont.resize(this->data_sz_); + for (auto & x : cont) { + this->read(x); + } + } + + data = loader2::Data(std::move(cont)); + + (*this)(other...); + } + + private: + template + void read(T & x) const { + this->is_ >> x; + } + + template + void read(std::vector & cont) const { + std::size_t sz; + if (this->is_ >> sz) { + cont.resize(sz); + for (auto & x : cont) { + this->is_ >> x; + } + } + } + + template + void read(std::array & arr) const { + for (auto & x : arr) { + this->is_ >> x; + } + } + }; +} + +namespace details_ { + template + std::ostream & operator<<(std::ostream & os, DefaultDatas const & datas) { + os << datas.size() << '\n'; + loader2::apply_from_datas(datas, details_::WriteApplyData{os}); + return os; + } + + template + std::istream & operator>>(std::istream & is, DefaultDatas & datas) { + std::size_t sz; + is >> sz; + using loader2::Data; + struct Tuple : Data... {} t; + details_::ReadApplyData read{is, sz}; + read(static_cast&>(t)...); + datas = PpOcrDatas(static_cast&&>(t)...); + return is; + } +} + +} + +#undef REGISTRY2 +#undef REGISTRY + +#endif diff --git a/projects/ppocr/ppocr/filters/best_baseline.hpp b/projects/ppocr/ppocr/filters/best_baseline.hpp new file mode 100644 index 0000000000..0c9daff416 --- /dev/null +++ b/projects/ppocr/ppocr/filters/best_baseline.hpp @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_FILTERS_BEST_BASELINE_HPP +#define PPOCR_SRC_FILTERS_BEST_BASELINE_HPP + +#include +#include + + +namespace ppocr { namespace filters { + +using std::size_t; + +template +typename std::iterator_traits::value_type +best_baseline(FwIt first, FwIt last) +{ + using value_type = typename std::iterator_traits::value_type; + if (first == last) { + return ~value_type{}; + } + std::map ys; + for (; first != last; ++first) { + ++ys[*first]; + } + using cP = std::pair const; + return std::max_element(ys.begin(), ys.end(), [](cP & a, cP & b) { + return a.second < b.second; + })->first; +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/image/coordinate.cpp b/projects/ppocr/ppocr/image/coordinate.cpp new file mode 100644 index 0000000000..60bef22794 --- /dev/null +++ b/projects/ppocr/ppocr/image/coordinate.cpp @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/image/coordinate.hpp" + +#include +#include + +namespace ppocr { + +std::ostream & operator<<(std::ostream & os, Index const & idx) { + return os << idx.x() << ' ' << idx.y(); +} + +std::ostream & operator<<(std::ostream & os, Bounds const & bnd) { + return os << bnd.w() << ' ' << bnd.h(); +} + +std::istream & operator>>(std::istream & is, Index & idx) { + size_t x, y; + if (is >> x >> y) { + idx = Index(x, y); + } + return is; +} + +std::istream & operator>>(std::istream & is, Bounds & bnd) { + size_t w, h; + if (is >> w >> h) { + bnd = Bounds(w, h); + } + return is; +} + +} diff --git a/projects/ppocr/ppocr/image/coordinate.hpp b/projects/ppocr/ppocr/image/coordinate.hpp new file mode 100644 index 0000000000..fef721ab5f --- /dev/null +++ b/projects/ppocr/ppocr/image/coordinate.hpp @@ -0,0 +1,84 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_COORDINATE_HPP +#define PPOCR_COORDINATE_HPP + +#include +#include + + +namespace ppocr { + +using std::size_t; + +struct Index { + Index(size_t x, size_t y) + : x_(x) + , y_(y) + {} + + Index() = default; + + size_t x() const noexcept { return x_; } + size_t y() const noexcept { return y_; } + + bool operator == (Index const & other) const noexcept + { return this->x_ == other.x_ && this->y_ == other.y_; } + bool operator != (Index const & other) const noexcept + { return !(*this == other); } + +private: + size_t x_ = 0; + size_t y_ = 0; +}; + +struct Bounds { + Bounds(size_t w, size_t h) + : w_(w) + , h_(h) + {} + + Bounds() = default; + + size_t area() const noexcept { return w_ * h_; } + size_t w() const noexcept { return w_; } + size_t h() const noexcept { return h_; } + + size_t contains(Index const & idx) const + { return idx.x() < w() && idx.y() < h(); } + + bool operator == (Bounds const & other) const noexcept + { return this->w_ == other.w_ && this->h_ == other.h_; } + bool operator != (Bounds const & other) const noexcept + { return !(*this == other); } + +private: + size_t w_ = 0; + size_t h_ = 0; +}; + + +std::ostream & operator<<(std::ostream & os, Index const & idx); +std::istream & operator>>(std::istream & is, Index & idx); +std::ostream & operator<<(std::ostream & os, Bounds const & bnd); +std::istream & operator>>(std::istream & is, Bounds & bnd); + +} + +#endif diff --git a/projects/ppocr/ppocr/image/image.cpp b/projects/ppocr/ppocr/image/image.cpp new file mode 100644 index 0000000000..439ea6a5c0 --- /dev/null +++ b/projects/ppocr/ppocr/image/image.cpp @@ -0,0 +1,100 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/image/image.hpp" +#include + +#include + +namespace ppocr { + +namespace { + using cP = Pixel const *; + using P = Pixel *; +} + +Image::Image(Bounds const& bounds, PtrImageData data) +: data_(std::move(data)) +, bounds_(bounds) +{} + +void section(Image const & from, Pixel * data, Index const & idx, Bounds const & bnd) +{ + cP d = from.data(idx); + for (size_t y = 0; y != bnd.h(); ++y) { + data = std::copy(d, d+bnd.w(), data); + d += from.width(); + } +} + +Image Image::section(Index const& section_idx, Bounds const& section_bnd) const +{ + assert(bounds_.contains(section_idx)); + assert(section_bnd.w() + section_idx.x() <= width() && section_bnd.h() + section_idx.y() <= height()); + PtrImageData data(new Pixel[section_bnd.area()]); + ::ppocr::section(*this, data.get(), section_idx, section_bnd); + return {{section_bnd.w(), section_bnd.h()}, std::move(data)}; +} + +void rotate90(Image const & from, Pixel * data) +{ + for (size_t x = from.width(); x; ) { + --x; + for (cP d = from.data() + x, e = d + from.area(); d != e; d += from.width()) { + *data++ = *d; + } + } +} + +Image Image::rotate90() const +{ + PtrImageData data(new Pixel[this->area()]); + ::ppocr::rotate90(*this, data.get()); + return {{this->height(), this->width()}, std::move(data)}; +} + +Image Image::clone() const +{ + return this->section({}, this->bounds()); +} + +bool operator==(const Image& a, const Image& b) +{ + return a.width() == b.width() + && a.height() == b.height() + && std::equal(a.data(), a.data_end(), b.data()); +} + +std::ostream & operator<<(std::ostream & os, Image const & image) +{ + os.fill(':'); + os.width(image.width()+3); + os << ":\n"; + cP p = image.data_.get(); + for (size_t h = 0; h != image.height(); ++h) { + os << ':'; + os.write(p, image.width()); + os << ":\n"; + p += image.width(); + } + os.width(image.width()+3); + os << ":\n"; + return os; +} + +} diff --git a/projects/ppocr/ppocr/image/image.hpp b/projects/ppocr/ppocr/image/image.hpp new file mode 100644 index 0000000000..bdde370e2a --- /dev/null +++ b/projects/ppocr/ppocr/image/image.hpp @@ -0,0 +1,229 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_IMAGE_HPP +#define PPOCR_IMAGE_HPP + +#include "ppocr/image/coordinate.hpp" +#include "pixel.hpp" + +#include + + +namespace ppocr { + +template +struct HorizontalRange; + +using PtrImageData = std::unique_ptr; + +struct Image +{ + Image() = default; + + Image(Bounds const & bounds, PtrImageData data); + + size_t width() const noexcept { return bounds_.w(); } + size_t height() const noexcept { return bounds_.h(); } + + Bounds const & bounds() const noexcept { return bounds_; } + size_t area() const noexcept { return bounds_.area(); } + + Image section(Index const & section_idx, Bounds const & section_bnd) const; + + Image rotate90() const; + + Image clone() const; + + Pixel operator[](Index const & idx) const noexcept + { return data()[to_size_t(idx)]; } + + Pixel const * data() const noexcept { return this->data_.get(); } + + Pixel const * data(Index const & idx) const noexcept + { return data() + to_size_t(idx); } + + Pixel const * data_end() const noexcept + { return data() + width() * height(); } + + size_t to_size_t(Index const & idx) const noexcept + { return idx.y() * this->width() + idx.x(); } + + explicit operator bool () const noexcept { return bool(this->data_); } + PtrImageData release() { return std::move(data_); } + + friend std::ostream & operator<<(std::ostream &, Image const &); + +private: + PtrImageData data_; + Bounds bounds_; + + template + friend struct HorizontalRange; +}; + + +void rotate90(Image const & from, Pixel * data); +void section(Image const & from, Pixel * data, Index const & idx, Bounds const & bnd); + +bool operator == (Image const &, Image const &); +inline bool operator != (Image const & a, Image const & b) { return !(a == b); } + + +struct NormalPixelGet { + constexpr NormalPixelGet() noexcept {} + + bool operator()(Pixel const * p) const + { return is_pix_letter(*p); } +}; + +template +struct HorizontalRange +{ + struct iterator + { + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = bool; + using reference = bool; + using pointer = void; + + bool operator<(iterator const & other) const { return data_ < other.data_; } + bool operator==(iterator const & other) const { return data_ == other.data_; } + bool operator!=(iterator const & other) const { return !(*this == other); } + + bool operator*() const { return r_.pixel_get_(data_); } + + bool operator-(iterator const & other) const { return data_ - other.data_; } + + iterator & operator++() { ++data_; return *this; } + + Pixel const * base() const noexcept { return data_; } + + private: + HorizontalRange const & r_; + Pixel const * data_; + + friend struct HorizontalRange; + + iterator(HorizontalRange const & r, Pixel const * data) + : r_(r) + , data_(data) + {} + }; + + HorizontalRange(Image const & img, Index idx, size_t w, PixelGetter pixel_get) + : pixel_get_(pixel_get) + , w_(w) + , data_(img.data(idx)) + {} + + iterator begin() const { return {*this, data_}; } + iterator end() const { return {*this, data_ + w_}; } + size_t size() const { return w_; } + +private: + PixelGetter pixel_get_; + size_t w_; + Pixel const * data_; +}; + + +template +HorizontalRange hrange(Image const & img, Index pos, size_t w, PixelGetter pixel_get) +{ return {img, pos, w, pixel_get}; } + +inline HorizontalRange hrange(Image const & img, Index pos, size_t w) +{ return {img, pos, w, NormalPixelGet()}; } + + +struct AnyPixelGet +{ + constexpr AnyPixelGet(size_t h, size_t step) noexcept + : h_(h) + , step_(step) + {} + + bool operator()(Pixel const * p) const + { + for (Pixel const * e = p + h_*step_; p != e; p += step_) { + if (is_pix_letter(*p)) { + return true; + } + } + return false; + } + +private: + size_t h_; + size_t step_; +}; + +inline HorizontalRange hrange(Image const & img, Index pos, Bounds bounds) +{ return {img, pos, bounds.w(), AnyPixelGet(bounds.h(), img.width())}; } + + +namespace rng +{ + template + bool any(HorizontalRange const & range) + { + for (bool is : range) { + if (is) { + return true; + } + } + return false; + } + + template + bool all(HorizontalRange const & range) + { + for (bool is : range) { + if (!is) { + return false; + } + } + return true; + } + + template + bool none(HorizontalRange const & range) + { + for (bool is : range) { + if (is) { + return false; + } + } + return true; + } + + template + bool next_alternation(Iterator & it, Iterator last) + { + bool x = *it; + while (it != last && *it == x) { + ++it; + } + return it != last; + } +} + +} + +#endif diff --git a/projects/ppocr/ppocr/image/image_from_file.cpp b/projects/ppocr/ppocr/image/image_from_file.cpp new file mode 100644 index 0000000000..616d1cf6d1 --- /dev/null +++ b/projects/ppocr/ppocr/image/image_from_file.cpp @@ -0,0 +1,297 @@ +/* +SPDX-FileCopyrightText: 2024 Wallix Proxies Team +SPDX-License-Identifier: LGPL-2.1-or-later +*/ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include "ppocr/image/image_from_file.hpp" + + +namespace +{ + struct Buffer + { + Buffer(std::size_t len) + : len(len) + { + data = static_cast(operator new(len)); + } + + Buffer(Buffer&& other) + : data(std::exchange(other.data, nullptr)) + , len(std::exchange(other.len, 0)) + {} + + Buffer(Buffer const&) = delete; + Buffer& operator=(Buffer const&) = delete; + + ~Buffer() + { + operator delete(data); + } + + uint8_t* data = nullptr; + std::size_t len = 0; + }; + + struct File + { + File(char const* filename) + : fd(open(filename, O_RDONLY)) + {} + + ~File() + { + close(fd); + } + + int fd; + }; + + [[noreturn]] + void throw_error(const char * filename) + { + std::string desc = "image_from_file: "; + desc += filename; + if (int errnum = errno) { + desc += ": "; + desc += strerror(errnum); + } + throw std::runtime_error(std::move(desc)); + } + + Buffer read_all(const char * filename) + { + File file(filename); + + struct stat st; + if (file.fd < 0) { + throw_error(filename); + } + + if (int err = fstat(file.fd, &st)) { + throw_error(filename); + } + + if (st.st_size < 0) { + errno = 0; + throw_error(filename); + } + + Buffer buf(static_cast(st.st_size)); + std::size_t remaining = buf.len; + auto* p = buf.data; + while (ssize_t res = read(file.fd, p, remaining)) { + if (res <= 0) { + if (res == 0) { + buf.data -= remaining; + return 0; + } + throw_error(filename); + } + remaining -= static_cast(res); + p += res; + } + + return buf; + } + + struct PnmHeader + { + uint8_t type; + unsigned width; + unsigned height; + unsigned max_value; + uint8_t const* end_ptr; + + bool parse(uint8_t const* p, uint8_t const* end) + { + // pnm format: + // 'P' ['1'-'6'] '\n' # type + // ('#'...'\n')* # comments + // ['0'-'9']+ ' ' ['0'-'9']+ '\n' # width height + // ['0'-'9']+ '\n' # max value (when type != 1 or 4) + // data + + if (end - p < 9) { + return false; + } + + type = p[1]; + + // check magic ('P1'..'P6') + if (p[0] != 'P' || (type < '1' || type > '6') || p[2] != '\n') { + return false; + } + + p += 3; + + // skip comments ('#'...'\n') + while (p < end && *p == '#') { + for (;;) { + ++p; + if (p == end) { + break; + } + + if (*p == '\n') { + ++p; + break; + } + } + } + + auto as_chars = [](uint8_t const* p) { return reinterpret_cast(p); }; + auto as_bytes = [](char const* p) { return reinterpret_cast(p); }; + + auto consume_uint = [&](unsigned& n, uint8_t next_ch) { + auto res1 = std::from_chars(as_chars(p), as_chars(end), n); + if (res1.ec != std::errc{}) { + return false; + } + p = as_bytes(res1.ptr); + + if (p == end || *p != next_ch) { + return false; + } + + ++p; + + return true; + }; + + // get size ( ' ' ) + if (!consume_uint(width, ' ') || !consume_uint(height, '\n')) { + return false; + } + + // max value + max_value = 0; + if (type != '1' && type != '4') { + if (!consume_uint(max_value, '\n')) { + return false; + } + } + + end_ptr = p; + return true; + } + }; +} // anonymous namespace + +namespace ppocr { + +Image image_from_file(const char * filename, unsigned luminance) +{ + Buffer buf = read_all(filename); + + PnmHeader header; + uint8_t const* end = buf.data + buf.len; + if (!header.parse(buf.data, end)) { + throw std::runtime_error("image_from_file: badly formed header"); + } + + constexpr char const* unsupported_max_value + = "image_from_file: unsupported format: maximum value is 1"; + + constexpr char const* invalid_data_len + = "image_from_file: invalid data len"; + + std::size_t img_size = header.width * header.height; + + if (!img_size) { + throw std::runtime_error("image_from_file: empty image"); + } + + PtrImageData vimg(new Pixel[img_size]); + uint8_t const* p = header.end_ptr; + char* output = vimg.get(); + + // binary data (pnm) + if (header.type == '6') { + if (static_cast(end - p) != img_size * 3) { + throw std::runtime_error(invalid_data_len); + } + + for (; p < end; p += 3) { + unsigned char c + = ((511/*PPM_RED_WEIGHT*/ * p[0] + 511) >> 10) + + ((396/*PPM_GREEN_WEIGHT*/ * p[1] + 511) >> 10) + + ((117/*PPM_BLUE_WEIGHT*/ * p[2] + 511) >> 10); + *output++ = (c < luminance) ? '-' : 'x'; + } + } + // binary data (pbm) + else if (header.type == '4') { + if (static_cast(end - p) != (header.width + 7) / 8 * header.height) { + throw std::runtime_error(invalid_data_len); + } + + for (unsigned row = 0; row < header.height; ++row) { + int i = 0; + unsigned c = 0; + for (unsigned col = 0; col < header.width; ++col) { + if (i % 8 == 0) { + c = *p++; + } + *output++ = (c & 128) ? '-' : 'x'; + c <<= 1; + ++i; + } + } + } + // ascii data (pnm and pbm) + else if (header.type == '3' || header.type == '1') { + std::size_t compunt_counter = 0; + for (; p < end; ++p) { + if (*p == ' ') { + // ignored + } + else if (*p == '0') { + *output++ = '-'; + if (++compunt_counter == img_size) { + break; + } + } + else if (*p == '1') { + *output++ = 'x'; + if (++compunt_counter == img_size) { + break; + } + } + else if (*p == '\n') { + // ignored + } + else { + throw std::runtime_error(unsupported_max_value); + } + } + + // skip blank + for (; p < end && (*p == ' ' || *p == '\n'); ++p) { + } + + if (p != end) { + throw std::runtime_error(invalid_data_len); + } + } + else { + throw std::runtime_error("image_from_file: unsupported format (expected 1, 3, 4 and 6)"); + } + + return Image(Bounds(header.width, header.height), std::move(vimg)); +} + +} diff --git a/projects/ppocr/ppocr/image/image_from_file.hpp b/projects/ppocr/ppocr/image/image_from_file.hpp new file mode 100644 index 0000000000..2388f80961 --- /dev/null +++ b/projects/ppocr/ppocr/image/image_from_file.hpp @@ -0,0 +1,30 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_IMAGE_IMAGE_FROM_FILE_HPP +#define PPOCR_SRC_IMAGE_IMAGE_FROM_FILE_HPP + +#include "ppocr/image/image.hpp" + +namespace ppocr { + +Image image_from_file(const char * filename, unsigned luminance = 128); + +} + +#endif diff --git a/projects/ppocr/ppocr/image/image_from_string.cpp b/projects/ppocr/ppocr/image/image_from_string.cpp new file mode 100644 index 0000000000..25275b146d --- /dev/null +++ b/projects/ppocr/ppocr/image/image_from_string.cpp @@ -0,0 +1,37 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/image/image_from_string.hpp" + +#include + +namespace ppocr { + +Image image_from_string(const Bounds& bnd, const char * pix_data) +{ + assert(pix_data[bnd.area()] == 0 && (pix_data[bnd.area()-1] == '-' || pix_data[bnd.area()-1] == 'x')); + + return Image(bnd, [pix_data, &bnd]{ + size_t const area = bnd.area(); + PtrImageData data(new Pixel[area]); + std::copy(pix_data, pix_data+area, data.get()); + return data; + }()); +} + +} diff --git a/projects/ppocr/ppocr/image/image_from_string.hpp b/projects/ppocr/ppocr/image/image_from_string.hpp new file mode 100644 index 0000000000..d3f1445189 --- /dev/null +++ b/projects/ppocr/ppocr/image/image_from_string.hpp @@ -0,0 +1,31 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_IMAGE_FROM_STRING_HPP +#define PPOCR_IMAGE_FROM_STRING_HPP + +#include "ppocr/image/coordinate.hpp" +#include "ppocr/image/image.hpp" + +namespace ppocr { + +Image image_from_string(Bounds const & bnd, const char * data); + +} + +#endif diff --git a/projects/ppocr/ppocr/image/pixel.hpp b/projects/ppocr/ppocr/image/pixel.hpp new file mode 100644 index 0000000000..335f882db7 --- /dev/null +++ b/projects/ppocr/ppocr/image/pixel.hpp @@ -0,0 +1,38 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_IMAGE_PIXEL_HPP +#define PPOCR_SRC_IMAGE_PIXEL_HPP + +namespace ppocr { + +using Pixel = char; + +inline bool is_pix_letter(Pixel pix) noexcept +{ return pix == 'x'; } + +struct is_pix_letter_fn { + constexpr is_pix_letter_fn() noexcept {} + + bool operator()(Pixel pix) const noexcept + { return is_pix_letter(pix); } +}; + +} + +#endif diff --git a/projects/ppocr/ppocr/loader2/datas_loader.hpp b/projects/ppocr/ppocr/loader2/datas_loader.hpp new file mode 100644 index 0000000000..7f3022ee5e --- /dev/null +++ b/projects/ppocr/ppocr/loader2/datas_loader.hpp @@ -0,0 +1,228 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_LOADER2_DATAS_LOADER_HPP +#define PPOCR_SRC_LOADER2_DATAS_LOADER_HPP + +#include +#include +#include + +#include "ppocr/image/image.hpp" +#include + +namespace ppocr { + +class Image; + +namespace loader2 { + +using std::size_t; + +enum class PolicyLoader { img, img90 }; + +template +struct Strategy +{ + using strategy_type = Strategy_; + constexpr static PolicyLoader policy = Policy; +}; + +namespace details_ { + template + struct first_type + { using type = T; }; + + + struct Lt { template bool operator()(T const & a, T const & b) const { return a < b; } }; + struct Eq { template bool operator()(T const & a, T const & b) const { return a == b; } }; + struct Noop { template bool operator()(T const & , T const & ) const { return false; } }; + + template + bool cmp_datas(size_t i1, size_t i2, Cmp cmp, Cmp2, Data const & data) { + return cmp(data[i1], data[i2]); + } + + template + bool cmp_datas(size_t i1, size_t i2, Cmp cmp, Cmp2 cmp2, Data const & data, Datas const & ... others) { + if (cmp(data[i1], data[i2]) || cmp2(data[i1], data[i2])) { + return cmp_datas(i1, i2, cmp, cmp2, others...); + } + return false; + } + + template + constexpr std::integral_constant + is_contiguous(int) { return {}; } + + template + constexpr std::false_type + is_contiguous(unsigned) { return {}; } +} + +template +typename Strategy_::value_type +load(Strategy_ const & strategy, PolicyLoader policy, Image const & img, Image const & img90) +{ return policy == PolicyLoader::img ? strategy.load(img, img90) : strategy.load(img90, img); } + +template +struct Data +{ + using strategy_type = typename Strategy::strategy_type; + using value_type = typename strategy_type::value_type; + using relationship_type = typename strategy_type::relationship_type; + + using container_type = std::vector; + using iterator = typename container_type::const_iterator; + using const_iterator = iterator; + + using is_contiguous = decltype(details_::is_contiguous(1)); + + Data() = default; + + explicit Data(container_type && cont) noexcept + : data_(std::move(cont)) + {} + + explicit Data(container_type const & cont) + : data_(cont) + {} + + container_type release() noexcept { + return std::move(this->data_.values); + } + + void load(Image const & img, Image const & img90) { + this->data_.values.push_back(::ppocr::loader2::load( + get_strategy(), + Strategy::policy, img, img90) + ); + } + + value_type const & operator[](size_t i) const { + return data_.values[i]; + } + + relationship_type const & get_relationship() const { + return static_cast(data_); + } + + strategy_type const & get_strategy() const { + return static_cast(this->data_); + } + + typename relationship_type::result_type + relationship(value_type const & a, value_type const & b) const { + return get_relationship()(a, b); + } + + double dist(value_type const & a, value_type const & b) const { + double const ret = get_relationship().dist(a, b); + assert(0. <= ret && ret <= 1.); + return ret; + } + + std::size_t count_posibilities() const { + return get_relationship().count(); + } + + std::size_t size() const noexcept { + return this->data_.values.size(); + } + + container_type const & data() const noexcept { + return this->data_.values; + } + + iterator begin() const { return this->data().begin(); } + iterator end() const { return this->data().end(); } + +private: + struct impl : strategy_type, relationship_type /*empty class optimization*/ { + container_type values; + + template + impl(Args && ... args) + : relationship_type(static_cast(*this).relationship()) + , values(std::forward(args)...) + {} + } data_; +}; + +template +struct Datas : private Data... +{ + Datas() = default; + + explicit Datas(Data && ... datas) + : Data(std::move(datas))... + {} + + template + Data const & get() const noexcept { + return static_cast const &>(*this); + } + + template + Data & get() noexcept { + return static_cast &>(*this); + } + + std::size_t size() const noexcept { + return this->get::type>().size(); + } + + std::size_t release() const noexcept { + return this->get::type>().size(); + } + + void load(Image const & img) { + auto img90 = img.rotate90(); + (void(std::initializer_list{ + (static_cast&>(*this).load(img, img90), char())... + })); + } + + bool lt(size_t i1, size_t i2) const { + return details_::cmp_datas(i1, i2, details_::Lt(), details_::Eq(), get()...); + } + + bool eq(size_t i1, size_t i2) const { + return details_::cmp_datas(i1, i2, details_::Eq(), details_::Noop(), get()...); + } +}; + + +template +Data const & get_data(Datas const & datas) { + return datas.template get(); +} + +template +void apply_from_datas(Datas const & datas, Fn fn) { + (void)std::initializer_list{((void)(fn(datas.template get())), char())...}; +} + +template +void apply_from_datas(Datas & datas, Fn fn) { + (void)std::initializer_list{((void)(fn(datas.template get())), char())...}; +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/loader2/glyphs_loader.cpp b/projects/ppocr/ppocr/loader2/glyphs_loader.cpp new file mode 100644 index 0000000000..8db5621e8c --- /dev/null +++ b/projects/ppocr/ppocr/loader2/glyphs_loader.cpp @@ -0,0 +1,85 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/loader2/glyphs_loader.hpp" +#include "ppocr/loader2/image_io.hpp" + +#include +#include + +namespace ppocr { namespace loader2 { + +std::istream & operator>>(std::istream& is, View & view) { + return is >> view.word >> view.font >> view.info_line; +} + +std::istream & operator>>(std::istream& is, Views & views) { + unsigned n; + if (!(is >> n)) { + return is; + } + unsigned i = 0; + views.resize(n); + while (i < n && (is >> views[i])) { + ++i; + } + return is; +} + +std::istream & operator>>(std::istream& is, Glyph & glyph) { + if (read_img(is, glyph.img)) { + is >> glyph.views; + } + return is; +} + +std::istream & operator>>(std::istream& is, Glyphs & glyphs) { + Glyph glyph; + while (is >> glyph) { + glyphs.push_back({std::move(glyph.img), glyph.views}); + } + return is; +} + + +std::ostream & operator<<(std::ostream& os, View const & view) { + return os << view.word << ' ' << view.font << ' ' << view.info_line; +} + +std::ostream & operator<<(std::ostream& os, Views const & views) { + os << views.size() << "\n"; + for (auto & view : views) { + os << view << '\n'; + } + return os; +} + +std::ostream & operator<<(std::ostream& os, Glyph const & glyph) { + return write_img(os, glyph.img) << "\n" << glyph.views; +} + +std::ostream & operator<<(std::ostream& os, Glyphs const & glyphs) { + for (auto & glyph : glyphs) { + if (!(os << glyph)) { + break; + } + } + return os; +} + +} } diff --git a/projects/ppocr/ppocr/loader2/glyphs_loader.hpp b/projects/ppocr/ppocr/loader2/glyphs_loader.hpp new file mode 100644 index 0000000000..532de82be3 --- /dev/null +++ b/projects/ppocr/ppocr/loader2/glyphs_loader.hpp @@ -0,0 +1,80 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_LOADER2_GLYPHS_LOADER_HPP +#define PPOCR_SRC_LOADER2_GLYPHS_LOADER_HPP + +#include +#include +#include + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace loader2 { + +struct View { + std::string word; + std::string font; + unsigned info_line; /*ignored*/ +}; + +inline bool operator == (View const & a, View const & b) { + return a.word == b.word && a.font == b.font && a.info_line == b.info_line; +} + +inline bool operator < (View const & a, View const & b) { + if (int const cmp1 = a.word.compare(b.word)) { + return cmp1 < 0; + } + if (int const cmp1 = a.font.compare(b.font)) { + return cmp1 < 0; + } + return a.info_line < b.info_line; +} + +// strong type +struct Views : std::vector +{ using std::vector::vector; }; + +struct Glyph { + Image img; + Views views; +}; + +inline bool operator == (Glyph const & a, Glyph const & b) { + return a.img == b.img && a.views == b.views; +} +bool operator < (Glyph const & a, Glyph const & b); + +// strong type +struct Glyphs : std::vector +{ using std::vector::vector; }; + +std::istream & operator>>(std::istream& is, Glyph & glyph); +std::ostream & operator<<(std::ostream& os, Glyph const & glyph); +std::istream & operator>>(std::istream& is, Glyphs & glyphs); +std::ostream & operator<<(std::ostream& os, Glyphs const & glyphs); + +std::istream & operator>>(std::istream& is, View & view); +std::ostream & operator<<(std::ostream& os, View const & view); +std::istream & operator>>(std::istream& is, Views & views); +std::ostream & operator<<(std::ostream& os, Views const & views); + +} } + +#endif diff --git a/projects/ppocr/ppocr/loader2/image_io.cpp b/projects/ppocr/ppocr/loader2/image_io.cpp new file mode 100644 index 0000000000..309d962277 --- /dev/null +++ b/projects/ppocr/ppocr/loader2/image_io.cpp @@ -0,0 +1,44 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/loader2/image_io.hpp" +#include "ppocr/image/image.hpp" + +#include +#include + +namespace ppocr { namespace loader2 { + +std::istream & read_img(std::istream & is, Image & img) { + Bounds bnd; + if (is >> bnd) { + std::unique_ptr p(new Pixel[bnd.area()]); + is.rdbuf()->snextc(); + is.read(p.get(), bnd.area()); + img = Image(bnd, std::move(p)); + } + return is; +} + +std::ostream & write_img(std::ostream & os, Image const & img) { + os << img.bounds() << ' '; + os.write(img.data(), img.area()); + return os; +} + +} } diff --git a/projects/ppocr/ppocr/loader2/image_io.hpp b/projects/ppocr/ppocr/loader2/image_io.hpp new file mode 100644 index 0000000000..b614a88865 --- /dev/null +++ b/projects/ppocr/ppocr/loader2/image_io.hpp @@ -0,0 +1,35 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_LOADER2_IMAGE_IO_HPP +#define PPOCR_SRC_LOADER2_IMAGE_IO_HPP + +#include + +namespace ppocr { + +class Image; + +namespace loader2 { + +std::istream & read_img(std::istream &, Image &); +std::ostream & write_img(std::ostream &, Image const &); + +} } + +#endif diff --git a/projects/ppocr/ppocr/math/almost_equal.hpp b/projects/ppocr/ppocr/math/almost_equal.hpp new file mode 100644 index 0000000000..c8b5a9b956 --- /dev/null +++ b/projects/ppocr/ppocr/math/almost_equal.hpp @@ -0,0 +1,41 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_MATH_ALMOST_EQUAL_HPP +#define PPOCR_MATH_ALMOST_EQUAL_HPP + +#include +#include +#include + +namespace ppocr { + +template +typename std::enable_if::is_integer, bool>::type +almost_equal(T x, T y, int ulp) +{ + // the machine epsilon has to be scaled to the magnitude of the values used + // and multiplied by the desired precision in ULPs (units in the last place) + return std::abs(x-y) < std::numeric_limits::epsilon() * std::abs(x+y) * ulp + // unless the result is subnormal + || std::abs(x-y) < std::numeric_limits::min(); +} + +} + +#endif diff --git a/projects/ppocr/ppocr/ocr2/ambiguous.hpp b/projects/ppocr/ppocr/ocr2/ambiguous.hpp new file mode 100644 index 0000000000..6cc8e3123b --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/ambiguous.hpp @@ -0,0 +1,30 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_AMBIGUOUS_HPP +#define PPOCR_SRC_OCR2_AMBIGUOUS_HPP + +#include "ppocr/ocr2/cache.hpp" + +namespace ppocr { namespace ocr2 { + +using ambiguous_t = std::vector>; + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/cache.hpp b/projects/ppocr/ppocr/ocr2/cache.hpp new file mode 100644 index 0000000000..1556d7e33d --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/cache.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_CAPTURE_RDP_PPOCR_CACHE_HPP +#define PPOCR_CAPTURE_RDP_PPOCR_CACHE_HPP + +#include "ppocr/ocr2/glyphs.hpp" + +#include "ppocr/image/image.hpp" +#include "ppocr/utils/image_compare.hpp" + +#include +#include +#include + +namespace ppocr { namespace ocr2 { + +using view_ref = std::reference_wrapper; +using view_ref_list = std::vector; + +struct def_img_compare { + def_img_compare() {} + bool operator()(ppocr::Image const & a, ppocr::Image const & b) const + { return ppocr::image_compare(a, b) < 0; } +}; +using image_cache_type_t = std::map; + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/compute_image.hpp b/projects/ppocr/ppocr/ocr2/compute_image.hpp new file mode 100644 index 0000000000..75ff1550cf --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/compute_image.hpp @@ -0,0 +1,245 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_COMPUTE_IMAGE_HPP +#define PPOCR_SRC_OCR2_COMPUTE_IMAGE_HPP + +#include "ppocr/ocr2/reduce_universe.hpp" +#include "ppocr/ocr2/data_strategy_loader.hpp" +#include "ppocr/ocr2/data_indexes_ordered.hpp" +#include "ppocr/ocr2/sort_probabilities.hpp" +#include "ppocr/ocr2/insert_views.hpp" +#include "ppocr/ocr2/cache.hpp" + +#include "ppocr/image/image.hpp" +#include "ppocr/loader2/datas_loader.hpp" + +namespace ppocr { namespace ocr2 { + +#ifndef IN_IDE_PARSER +# define UNPACK(...) void(std::initializer_list{(void(__VA_ARGS__), char())...}) +#else +# define UNPACK(a) void(std::initializer_list{(void(a), char())...}) +#endif + +template class Temp, class... Strategies, class Datas> +void reduce_complexe_universe( + Temp, + Probabilities & probabilities, + Datas const & datas, + Image const & img, + Image const & img90 +) { + UNPACK(reduce_universe_and_update_probability( + probabilities, + datas.template get(), + data_strategy_loader().load(img, img90), + 0.5 + )); +} + +template +struct data_exclusive_universe : data_strategy_loader { + size_t limit; +}; + +template class Temp, class Store, class Datas> +bool has_value(Temp<>, Datas const &, unsigned, Store const &) { + return true; +} + +template class Temp, class Store, class Strategy, class... Strategies, class Datas> +bool has_value( + Temp, + Datas const & datas, + unsigned i, Store const & store +) { + if (datas.template get().get_relationship().in_dist( + datas.template get()[i], + static_cast const&>(store).x, + static_cast const&>(store).limit + )) { + return has_value(Temp(), datas, i, store); + } + return false; +} + +template class Temp, class... Strategies, class Datas> +void reduce_exclusive_universe( + Temp, + Probabilities & probabilities, + Datas const & datas, + Image const & img, Image const & img90, + DataIndexesByWords const & data_indexes_by_words +) { + if (probabilities.empty()) { + return ; + } + + struct + : data_exclusive_universe... + {} store; + UNPACK(( + static_cast&>(store).load(img, img90), + (static_cast&>(store).limit + = datas.template get().count_posibilities()/2) + )); + + reduce_universe_by_word(probabilities, data_indexes_by_words, [&](unsigned i) { + return has_value(Temp(), datas, i, store); + }); +} + +// -------------- +template +void initialize_universe( + Probabilities & probabilities, + Datas const & datas, + DataIndexesOrdered const & first_strategy_ordered, + unsigned value +) { + probabilities.clear(); + auto & data = datas.template get(); + auto d = (data.count_posibilities()-1)/10u; + for (auto idx : first_strategy_ordered.get_range(datas, value, d)) { + probabilities.emplace_back(idx); + } +} + +template +void initialize_probability(Probabilities & probabilities, unsigned value, Data const & data) { + for (auto & prob : probabilities) { + prob.prob = data.dist(data[prob.i], value); + } +} + +template +void update_probability(Probabilities & probabilities, unsigned value, Data const & data) { + for (auto & prob : probabilities) { + prob.prob *= data.dist(data[prob.i], value); + } +} + +template< + template class Temp, + class FirstStrategyOrdered, + class FirstStrategy, + class... Strategies, + class Datas +> +void compute_simple_universe( + Temp, + Probabilities & probabilities, + Datas const & datas, + DataIndexesOrdered const & first_strategy_ordered, + Image const & img, + Image const & img90 +) { + struct + : data_strategy_loader + , data_strategy_loader... + {} store; + + static_assert(std::is_same::value, "is different"); + + initialize_universe( + probabilities, + datas, + first_strategy_ordered, + static_cast&>(store).load(img, img90) + ); + + UNPACK(reduce_universe_with_distance( + probabilities, + datas.template get(), + static_cast&>(store).load(img, img90), + (datas.template get().count_posibilities()-1)/10u + )); + + initialize_probability( + probabilities, + static_cast&>(store).x, + datas.template get() + ); + + UNPACK(update_probability( + probabilities, + static_cast&>(store).x, + datas.template get() + )); +} + +template< + class SimpleAlgos, + class ComplexAlgos, + class ExclusifAlgos, + class FirstStrategyOrdered, + class... DatasStrategies +> +view_ref_list compute_image( + SimpleAlgos, ComplexAlgos, ExclusifAlgos, + Probabilities & probabilities, + Probabilities & tmp_probabilities, + loader2::Datas const & datas, + DataIndexesOrdered const & first_strategy_ordered, + DataIndexesByWords const & data_indexes_by_words, + Glyphs const & glyphs, + std::vector const & id_views, + Image const & img, + Image const & img90, + double limit_prob_for_insert = 0.5 +) { + compute_simple_universe(SimpleAlgos{}, probabilities, datas, first_strategy_ordered, img, img90); + + view_ref_list cache_element; + + ocr2::sort_by_views(probabilities, id_views); + + if (ComplexAlgos::size || ExclusifAlgos::size) { + ocr2::unique_copy_by_views(tmp_probabilities, probabilities, id_views); + ocr2::sort_by_prop(tmp_probabilities); + + if (tmp_probabilities.empty()) { + return cache_element; + } + else if (tmp_probabilities[0].prob >= 1./* && tmp_probabilities.size() == 1*/) { + ocr2::insert_views(cache_element, tmp_probabilities, glyphs, 1.); + return cache_element; + } + else { + reduce_complexe_universe(ComplexAlgos(), probabilities, datas, img, img90); + reduce_exclusive_universe(ExclusifAlgos(), probabilities, datas, img, img90, data_indexes_by_words); + + ocr2::sort_by_views(probabilities, id_views); + } + } + + ocr2::unique_by_views(probabilities, id_views); + ocr2::sort_by_prop(probabilities); + if (!probabilities.empty()) { + ocr2::insert_views(cache_element, probabilities, glyphs, probabilities.front().prob * limit_prob_for_insert); + } + + return cache_element; +} + +#undef UNPACK + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/data_indexes_by_words.cpp b/projects/ppocr/ppocr/ocr2/data_indexes_by_words.cpp new file mode 100644 index 0000000000..c625a8e66b --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/data_indexes_by_words.cpp @@ -0,0 +1,47 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/ocr2/data_indexes_by_words.hpp" + +#include +#include + +ppocr::ocr2::DataIndexesByWords::DataIndexesByWords(const ppocr::ocr2::Glyphs& glyphs) +{ + std::map> map; + + unsigned n = 0; + for (auto & views : glyphs) { + auto & e = map[views.front().word]; + e.emplace_back(n++); + } + + this->indexes_by_words.resize(glyphs.size()); + for (auto & p : map) { + if (p.second.size() > 1) { + auto tmp = p.second.back(); + p.second.pop_back(); + indexes_by_words[tmp] = p.second; + for (auto & i : p.second) { + using std::swap; + swap(i, tmp); + indexes_by_words[tmp] = p.second; + } + } + } +} diff --git a/projects/ppocr/ppocr/ocr2/data_indexes_by_words.hpp b/projects/ppocr/ppocr/ocr2/data_indexes_by_words.hpp new file mode 100644 index 0000000000..30e7029318 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/data_indexes_by_words.hpp @@ -0,0 +1,47 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_DATA_INDEXES_BY_WORDS_HPP +#define PPOCR_SRC_OCR2_DATA_INDEXES_BY_WORDS_HPP + +#include "ppocr/ocr2/glyphs.hpp" + +#include + + +namespace ppocr { namespace ocr2 { + +struct DataIndexesByWords +{ + DataIndexesByWords(DataIndexesByWords &&) = default; + DataIndexesByWords(DataIndexesByWords const &) = delete; + + DataIndexesByWords(Glyphs const & glyphs); + + std::vector const & operator[](std::size_t i) const noexcept { + return this->indexes_by_words[i]; + } + +private: + std::vector> indexes_by_words; +}; + + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/data_indexes_ordered.hpp b/projects/ppocr/ppocr/ocr2/data_indexes_ordered.hpp new file mode 100644 index 0000000000..ca633dc77f --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/data_indexes_ordered.hpp @@ -0,0 +1,64 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_DATA_INDEXES_ORDERED_HPP +#define PPOCR_SRC_OCR2_DATA_INDEXES_ORDERED_HPP + +#include "ppocr/ocr2/indexes_ordered.hpp" +#include "ppocr/loader2/datas_loader.hpp" + +namespace ppocr { namespace ocr2 { + +template +struct DataIndexesOrdered +{ + using Data = ppocr::loader2::Data; + static_assert(Data::is_contiguous::value, "is not contiguous"); + + DataIndexesOrdered(Data const & data) + : indexes_ordered(data) + {} + + template + DataIndexesOrdered(ppocr::loader2::Datas const & datas) + : indexes_ordered(datas.template get()) + {} + + ::ppocr::range_iterator + get_range(Data const & data, unsigned x, unsigned d) const { + return ppocr::ocr2::range_bounds( + data, + this->indexes_ordered, + x > d ? x-d : 0u, + x + d + ); + } + + template + ::ppocr::range_iterator + get_range(ppocr::loader2::Datas const & datas, unsigned x, unsigned d) const { + return this->get_range(datas.template get(), x, d); + } + +private: + IndexesOrdered const indexes_ordered; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/data_strategy_loader.hpp b/projects/ppocr/ppocr/ocr2/data_strategy_loader.hpp new file mode 100644 index 0000000000..98294a1ce5 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/data_strategy_loader.hpp @@ -0,0 +1,42 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_DATA_STRATEGY_LOADER_HPP +#define PPOCR_SRC_OCR2_DATA_STRATEGY_LOADER_HPP + +#include "ppocr/image/image.hpp" + + +namespace ppocr { namespace ocr2 { + +template +struct data_strategy_loader { + using strategy_type = typename LoaderStrategy::strategy_type; + using value_type = typename strategy_type::value_type; + + value_type x; + + value_type load(ppocr::Image const & img, ppocr::Image const & img90) { + x = ppocr::loader2::load(strategy_type(), LoaderStrategy::policy, img, img90); + return x; + } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/disambiguous_with_dict.cpp b/projects/ppocr/ppocr/ocr2/disambiguous_with_dict.cpp new file mode 100644 index 0000000000..060a437dee --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/disambiguous_with_dict.cpp @@ -0,0 +1,213 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/ocr2/disambiguous_with_dict.hpp" +#include "ppocr/ocr2/glyphs.hpp" + +#include "ppocr/spell/word_disambiguouser.hpp" +#include "ppocr/spell/dictionary.hpp" + +#include +#include + + +namespace { + using namespace ppocr::ocr2; + + inline void append(std::string & result, Glyphs const & glyphs, view_ref const & v) { + auto & s = glyphs.get_word(v); + result.append(s.begin(), s.size()); + } + + inline void append(std::string & result, Glyphs const & glyphs, view_ref_list const & l) { + if (!l.empty()) { + append(result, glyphs, l.front()); + } + } +} + +unsigned ppocr::ocr2::disambiguous_with_dict( + ambiguous_t & ambiguous, + Glyphs const & glyphs, + spell::Dictionary const & dict, + std::vector::const_iterator it_space, + std::string & result +) { + unsigned unrecognized_count = 0; + + result.clear(); + auto search_fn = [&](ppocr::ocr2::view_ref_list const & vlist) -> bool { + if (vlist.empty()) { + return true; + } + auto & s = glyphs.get_word(vlist.front()); + return (s.size() == 1 + // TODO punct + && (s.front() == '!' + + || s.front() == '#' + || s.front() == '$' + || s.front() == '%' + || s.front() == '&' + || s.front() == '\'' + || s.front() == '(' + || s.front() == ')' + || s.front() == '*' + || s.front() == '+' + || s.front() == ',' + || s.front() == '-' + || s.front() == '.' + + || s.front() == ':' + || s.front() == ';' + || s.front() == '<' + || s.front() == '=' + || s.front() == '>' + || s.front() == '?' + + || s.front() == '[' + || s.front() == ']' + + || s.front() == '{' + || s.front() == '}' + ) + ); + }; + ppocr::spell::WordDisambiguouser word_disambiguouser; + + using ambiguous_iterator_base = decltype(ambiguous.begin()); + struct ambiguous_view_ref_list_iterator : ambiguous_iterator_base + { + using Glyphs = ppocr::ocr2::Glyphs; + + Glyphs const & glyphs; + + ambiguous_view_ref_list_iterator(ambiguous_iterator_base it, Glyphs const & glyphs) + : ambiguous_iterator_base(it) + , glyphs(glyphs) + {} + + struct value_type { + std::reference_wrapper ref_list; + Glyphs const & glyphs; + + struct range_string_iterator + { + using iterator = view_ref_list::const_iterator; + iterator cur; + iterator end; + Glyphs const & glyphs; + + range_string_iterator(iterator first, iterator last, Glyphs const & glyphs) + : cur(first) + , end(last) + , glyphs(glyphs) + {} + + using value_type = Glyphs::string; + + value_type const & operator*() const { + return this->glyphs.get_word(*this->cur); + } + + range_string_iterator & operator++() { + unsigned const i = this->cur->get().word; + while (++this->cur != this->end) { + if (i != this->cur->get().word) { + break; + } + } + return *this; + } + + bool operator==(range_string_iterator const & other) const { + return this->cur == other.cur; + } + + bool operator !=(range_string_iterator const & other) const { + return !(*this == other); + } + }; + + range_string_iterator begin() const { + return {ref_list.get().begin(), ref_list.get().end(), this->glyphs}; + } + range_string_iterator end() const { + return {ref_list.get().end(), ref_list.get().end(), this->glyphs}; + } + }; + + value_type operator*() const { + return {ambiguous_iterator_base::operator*(), this->glyphs}; + } + + ambiguous_view_ref_list_iterator & operator++() { + auto & base = static_cast(*this); + ++base; + return *this; + } + + ambiguous_view_ref_list_iterator operator+(std::ptrdiff_t i) const = delete; + ambiguous_view_ref_list_iterator & operator++(int) = delete; + }; + + auto first = ambiguous.begin(); + auto last = ambiguous.end(); + auto middle = std::find_if_not(first, first + *it_space, search_fn); + for (; first != middle; ++first) { + append(result, glyphs, *first); + } + if (first == ambiguous.begin() + *it_space) { + ++it_space; + } + while (first != last) { + auto e = ambiguous.begin() + *it_space; + assert(e <= ambiguous.end()); + assert(first < e); + auto middle = std::find_if(first, e, search_fn); + assert(middle <= e); + using It = ambiguous_view_ref_list_iterator; + if (!word_disambiguouser(dict, It(first, glyphs), It(middle, glyphs), result)) { + for (; first != middle; ++first) { + assert(first <= ambiguous.end()); + append(result, glyphs, *first); + } + } + + for (; middle != e; ++middle) { + if (middle->get().empty()) { + result += '?'; + ++unrecognized_count; + } + else if (search_fn(*middle)) { + append(result, glyphs, middle->get().front()); + } + else { + break; + } + } + if (middle == e && e != last) { + result += ' '; + ++it_space; + } + + first = middle; + } + + return unrecognized_count; +} diff --git a/projects/ppocr/ppocr/ocr2/disambiguous_with_dict.hpp b/projects/ppocr/ppocr/ocr2/disambiguous_with_dict.hpp new file mode 100644 index 0000000000..26c725cfd0 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/disambiguous_with_dict.hpp @@ -0,0 +1,41 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_DISAMBIGUOUS_WITH_DICT_HPP +#define PPOCR_SRC_OCR2_DISAMBIGUOUS_WITH_DICT_HPP + +#include "ppocr/ocr2/ambiguous.hpp" + +namespace ppocr { namespace spell { class Dictionary; } } + +namespace ppocr { namespace ocr2 { + +class Glyphs; + +/// \return unrecognized count +unsigned disambiguous_with_dict( + ambiguous_t & ambiguous, + Glyphs const & glyphs, + spell::Dictionary const & dict, + std::vector::const_iterator it_space, + std::string & result +); + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/filter_by_font.cpp b/projects/ppocr/ppocr/ocr2/filter_by_font.cpp new file mode 100644 index 0000000000..4c8727de9e --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/filter_by_font.cpp @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/ocr2/filter_by_font.hpp" + +#include + +unsigned ppocr::ocr2::filter_by_font(ppocr::ocr2::ambiguous_t & ambiguous) +{ + std::map fonts; + for (view_ref_list & vec : ambiguous) { + if (vec.size() == 1) { + ++fonts[vec[0].get().font]; + } + } + if (!fonts.empty()) { + using cP = decltype(fonts)::value_type const; + auto font_it = std::max_element(fonts.begin(), fonts.end(), [](cP & a, cP & b) { + return a.second < b.second; + }); + if (fonts.size() == 1 || font_it->second >= ambiguous.size()/2) { + for (view_ref_list & vec : ambiguous) { + if (vec.size() >= 2) { + auto pos = std::find_if( + vec.begin(), vec.end(), + [&](View const & view) { return view.font == font_it->first; } + ); + vec = {(pos != vec.end()) ? *pos : vec[0]}; + } + } + } + return font_it->first; + } + return ~0u; +} diff --git a/projects/ppocr/ppocr/ocr2/filter_by_font.hpp b/projects/ppocr/ppocr/ocr2/filter_by_font.hpp new file mode 100644 index 0000000000..2a8b55c1ee --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/filter_by_font.hpp @@ -0,0 +1,31 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_FILTER_BY_FONT_HPP +#define PPOCR_SRC_OCR2_FILTER_BY_FONT_HPP + +#include "ppocr/ocr2/ambiguous.hpp" + +namespace ppocr { namespace ocr2 { + +/// \return font id +unsigned filter_by_font(ambiguous_t & ambiguous); + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/filter_by_lines.cpp b/projects/ppocr/ppocr/ocr2/filter_by_lines.cpp new file mode 100644 index 0000000000..27340cb9f1 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/filter_by_lines.cpp @@ -0,0 +1,168 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/ocr2/filter_by_lines.hpp" +#include "ppocr/ocr2/words_infos.hpp" + +#include "ppocr/filters/best_baseline.hpp" + +#include + +namespace { + using ppocr::ocr2::View; + using ppocr::ocr2::view_ref_list; + + view_ref_list::iterator next_word(view_ref_list & vec) { + auto first = vec.begin(); + auto last = vec.end(); + if (first != last) { + auto word = first->get().word; + while (++first != last && first->get().word == word) { + } + } + return first; + } + + template + Counter filter_line(view_ref_list & vec, Predicate pred, Counter count) { + auto first = next_word(vec); + auto last = vec.end(); + + if (first != last) { + auto out = vec.begin(); + auto last = vec.end(); + if (!pred(*(first-1))) { + do { + auto word = first->get().word; + first = std::find_if(first+1, last, [&](View const & view) { return view.word != word; }); + ++count; + } while (first != last && !pred(*first)); + out = first; + } + while (first != last) { + auto word = first->get().word; + auto rlast = std::find_if(first+1, last, [&](View const & view) { return view.word != word; }); + if (!pred(*first)) { + out = std::copy(std::make_move_iterator(first), std::make_move_iterator(rlast), out); + ++count; + } + first = rlast; + } + + vec.erase(out, vec.end()); + } + + return count; + } +} + +void ppocr::ocr2::filter_by_lines( + ambiguous_t & ambiguous, + WordsInfos const & words_infos, + std::vector const & boxes +) { + using iterator_base = std::vector::const_iterator; + struct iterator_baseline : iterator_base + { + using value_type = std::size_t; + + iterator_baseline(iterator_base base) + : iterator_base(base) + {} + + std::size_t operator*() const { + return iterator_base::operator*().bottom(); + } + }; + + auto const baseline = filters::best_baseline( + iterator_baseline(boxes.begin()), + iterator_baseline(boxes.end()) + ); + + std::map meanline_map; + + //for (rdp_ppocr::view_ref_list & vec : ambiguous) { + // for (ppocr::ppocr::loader2::View const & view : vec) { + // std::cout << view.word << "(" << view.word.size() << ") "; + // } + // std::cout << "----\n"; + //} + //std::cout << "#######\n"; + + auto it = boxes.cbegin(); + assert(boxes.size() == ambiguous.size()); + for (ppocr::ocr2::view_ref_list & vec : ambiguous) { + if (filter_line(vec, [&](View const & view) -> bool { + if (auto p = words_infos.get(view.word)) { + switch (p->lines.baseline) { + case WordLines::Upper: + return it->bottom() + 1 >= baseline; + case WordLines::Below: + return (it->bottom() < baseline ? baseline - it->bottom() : it->bottom() - baseline) > 1u; + case WordLines::Above: + return it->bottom() <= baseline + 1; + case WordLines::Upper | WordLines::Below: + return it->bottom() + 1 > baseline; + case WordLines::Below | WordLines::Above: + return it->bottom() < baseline + 1; + } + } + return false; + }, 0u) == 1u) { + if (auto p = words_infos.get(vec[0].get().word)) { + auto const & lines = p->lines; + if (lines.baseline == WordLines::Below && lines.meanline == WordLines::Below) { + ++meanline_map[it->top()]; + } + } + } + + ++it; + } + + if (!meanline_map.empty()) { + using cP = decltype(meanline_map)::value_type const; + auto meanline = std::max_element(meanline_map.begin(), meanline_map.end(), [](cP & a, cP & b) { + return a.second < b.second; + })->first; + + it = boxes.cbegin(); + for (view_ref_list & vec : ambiguous) { + struct empty_counter { empty_counter & operator++() { return *this; } }; + filter_line(vec, [&](View const & view) -> bool { + if (auto p = words_infos.get(view.word)) { + switch (p->lines.meanline) { + case WordLines::Upper: + return it->top() + 1 >= meanline; + case WordLines::Below: + return (it->top() < meanline ? meanline - it->top() : it->top() - meanline) > 1u; + case WordLines::Above: + return it->top() <= meanline + 1; + case WordLines::Upper | WordLines::Below: + return it->top() + 1 > meanline; + case WordLines::Below | WordLines::Above: + return it->top() < meanline + 1; + } + } + return false; + }, empty_counter{}); + ++it; + } + } +} diff --git a/projects/ppocr/ppocr/ocr2/filter_by_lines.hpp b/projects/ppocr/ppocr/ocr2/filter_by_lines.hpp new file mode 100644 index 0000000000..8e3bd14d05 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/filter_by_lines.hpp @@ -0,0 +1,38 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_FILTER_BY_LINES_HPP +#define PPOCR_SRC_OCR2_FILTER_BY_LINES_HPP + +#include "ppocr/ocr2/ambiguous.hpp" +#include "ppocr/ocr2/words_infos.hpp" +#include "ppocr/box_char/box.hpp" + +namespace ppocr { namespace ocr2 { + +class WordsInfos; + +void filter_by_lines( + ambiguous_t & ambiguous, + WordsInfos const & words_infos, + std::vector const & boxes +); + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/glyphs.cpp b/projects/ppocr/ppocr/ocr2/glyphs.cpp new file mode 100644 index 0000000000..68fba9d8e4 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/glyphs.cpp @@ -0,0 +1,97 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/ocr2/glyphs.hpp" + +#include + +namespace ppocr { namespace ocr2 { + +std::ostream & operator<<(std::ostream & os, Glyphs::string const & str) { + return os.write(str.data(), str.size()); +} + +std::istream & operator>>(std::istream & is, Glyphs & glyphs) { + std::map font_map; + std::map word_map; + + std::string font; + std::string word; + constexpr auto limit_max = std::numeric_limits::max(); + + while (is.ignore(limit_max, '\n')) { + unsigned n; + if (!(is >> n)) { + break; + } + Views views; + views.resize(n); + + unsigned i = 0; + while (i < n && (is >> word >> font).ignore(limit_max, '\n')) { + auto it_word = word_map.find(word); + if (it_word == word_map.end()) { + it_word = word_map.emplace(std::move(word), word_map.size()).first; + } + views[i].word = it_word->second; + + auto it_font = font_map.find(font); + if (it_font == font_map.end()) { + it_font = font_map.emplace(std::move(font), font_map.size()).first; + } + views[i].font = it_font->second; + + ++i; + } + + glyphs.push_back(std::move(views)); + } + + std::unique_ptr buf_word; + unsigned buf_word_sz = 0; + { + std::vector reindex(word_map.size()); + for (auto & p : word_map) { + reindex[p.second] = buf_word_sz; + buf_word_sz += (p.first.size() + sizeof(unsigned) - 1) / sizeof(unsigned) + 1; + } + buf_word = std::unique_ptr(new unsigned[buf_word_sz]); + for (auto & views : glyphs) { + for (View & v : views) { + v.word = reindex[v.word]; + } + } + } + { + auto buf = buf_word.get(); + for (auto & p : word_map) { + *buf = p.first.size(); + ++buf; + std::copy(p.first.begin(), p.first.end(), reinterpret_cast(buf)); + reinterpret_cast(buf)[p.first.size()] = 0; + buf += (p.first.size() + sizeof(unsigned) - 1) / sizeof(unsigned); + } + } + + glyphs.buf_word = std::move(buf_word); + glyphs.buf_word_sz = buf_word_sz; + + return is; +} + +} } diff --git a/projects/ppocr/ppocr/ocr2/glyphs.hpp b/projects/ppocr/ppocr/ocr2/glyphs.hpp new file mode 100644 index 0000000000..c2e71497e0 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/glyphs.hpp @@ -0,0 +1,128 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_CAPTURE_RDP_PPOCR_GLYPHS_HPP +#define PPOCR_CAPTURE_RDP_PPOCR_GLYPHS_HPP + +#include +#include +#include +#include +#include +#include + +namespace ppocr { namespace ocr2 { + +struct View { + unsigned word; + unsigned font; +}; +struct Views : std::vector { + using std::vector::vector; + Views() = default; + Views(Views &&) = default; + Views(Views const &) = delete; + Views & operator=(Views &&) = default; + Views & operator=(Views const &) = delete; +}; +struct Glyphs : std::vector { + using std::vector::vector; + Glyphs() = default; + Glyphs(Glyphs &&) = default; + Glyphs(Glyphs const &) = delete; + Glyphs & operator=(Glyphs &&) = default; + Glyphs & operator=(Glyphs const &) = delete; + + struct string { + unsigned sz; + char word[1]; + + char const * data() const { return this->word; } + char const * begin() const { return this->word; } + char const * end() const { return this->word + this->sz; } + unsigned size() const { return this->sz; } + char front() const { return this->word[0]; } + }; + + string const & get_word(View const & v) const + { return this->get_word_(v.word); } + + constexpr static unsigned const no_index = ~unsigned{}; + + unsigned word_index_of(std::string_view s) const { + unsigned i = 0; + while (i < this->buf_word_sz) { + auto r = s.compare(0, s.size(), this->get_word_(i).data(), s.size()); + if (r <= 0) { + if (r == 0) { + return i; + } + break; + } + i += (this->get_word_(i).size() + sizeof(unsigned) * 2 - 1) / sizeof(unsigned); + } + return no_index; + } + +private: + std::unique_ptr buf_word; + unsigned buf_word_sz; + + string const & get_word_(unsigned i) const + { return *reinterpret_cast(this->buf_word.get() + i); } + + friend std::istream & operator>>(std::istream & is, Glyphs & glyphs); +}; + +std::ostream & operator<<(std::ostream & os, Glyphs::string const & str); + +std::istream & operator>>(std::istream & is, Glyphs & glyphs); + + +struct EqViewWord { + EqViewWord() {} + bool operator()(View const & v1, View const & v2) { + return v1.word == v2.word; + } +}; + +struct EqViews { + bool operator()(Views const & v1, Views const & v2) const { + return v1.size() == v2.size() && std::equal(v1.begin(), v1.end(), v2.begin(), EqViewWord{}); + } +}; + +struct LtViews { + bool operator()(Views const & v1, Views const & v2) const { + if (v1.size() < v2.size()) { + return true; + } + if (v1.size() > v2.size()) { + return false; + } + auto const pair = std::mismatch(v1.begin(), v1.end(), v2.begin(), EqViewWord{}); + if (pair.first == v1.end()) { + return false; + } + return pair.first->word < pair.second->word; + } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/image_context.hpp b/projects/ppocr/ppocr/ocr2/image_context.hpp new file mode 100644 index 0000000000..65d4f18b77 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/image_context.hpp @@ -0,0 +1,71 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_IMAGE_CONTEXT_HPP +#define PPOCR_SRC_OCR2_IMAGE_CONTEXT_HPP + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace ocr2 { + +struct ImageContext +{ + template + ppocr::Image const & img(Bounds const & bnd, Maker maker) { + this->reserve(bnd); + maker(this->data_.get()); + return this->img_; + } + + ppocr::Image const & img90() { + this->img90_.release().release(); + this->img90_ = Image( + {this->img_.height(), this->img_.width()}, + PtrImageData(this->data_.get() + this->img_.area()) + ); + ppocr::rotate90(this->img_, this->data_.get() + this->img_.area()); + return this->img90_; + } + + ~ImageContext() { + this->img_.release().release(); + this->img90_.release().release(); + } + +private: + PtrImageData data_; + std::size_t capacity_sz_ = 0; + + ppocr::Image img_; + ppocr::Image img90_; + + + void reserve(Bounds const & bnd) { + auto const new_sz = bnd.area() * 2; + this->img_.release().release(); + if (new_sz > this->capacity_sz_) { + this->data_.reset(new ppocr::Pixel[new_sz]); + this->capacity_sz_ = new_sz; + } + this->img_ = Image(bnd, PtrImageData(this->data_.get())); + } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/indexes_ordered.hpp b/projects/ppocr/ppocr/ocr2/indexes_ordered.hpp new file mode 100644 index 0000000000..687799cad9 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/indexes_ordered.hpp @@ -0,0 +1,79 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_INDEXES_ORDERED_HPP +#define PPOCR_SRC_OCR2_INDEXES_ORDERED_HPP + +#include "ppocr/utils/range_iterator.hpp" + +#include +#include +#include + +namespace ppocr { namespace ocr2 { + +struct IndexesOrdered +{ + IndexesOrdered() = default; + IndexesOrdered(IndexesOrdered &&) = default; + IndexesOrdered(IndexesOrdered const &) = delete; + + template + IndexesOrdered(Cont const & data) + : indexes_(new unsigned[data.size()]) + { + std::iota(this->indexes_.get(), this->indexes_.get()+ data.size(), 0u); + std::sort(this->indexes_.get(), this->indexes_.get()+ data.size(), [&](unsigned i1, unsigned i2) { + return data[i1] < data[i2]; + }); + } + + using iterator = unsigned const *; + using const_iterator = iterator; + + iterator begin() const { return this->indexes_.get(); } + +private: + std::unique_ptr indexes_; +}; + + +template +::ppocr::range_iterator +range_bounds(Data const & data, IndexesOrdered const & indexes, T const & start, T const & stop) { + auto first = std::lower_bound( + indexes.begin(), + indexes.begin() + data.size(), + start, + [&](unsigned i, T const & x) { + return data[i] < x; + } + ); + return {first, std::upper_bound( + first, + indexes.begin() + data.size(), + stop, + [&](T const & x, unsigned i) { + return x < data[i]; + } + )}; +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/insert_views.hpp b/projects/ppocr/ppocr/ocr2/insert_views.hpp new file mode 100644 index 0000000000..387e120e3b --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/insert_views.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_INSERT_VIEWS_HPP +#define PPOCR_SRC_OCR2_INSERT_VIEWS_HPP + +#include "probabilities.hpp" +#include "ppocr/ocr2/glyphs.hpp" +#include "ppocr/ocr2/cache.hpp" + +namespace ppocr { namespace ocr2 { + +inline void insert_views( + ppocr::ocr2::view_ref_list & cache_element, + ppocr::ocr2::Probabilities const & probabilities, + ppocr::ocr2::Glyphs const & glyphs, + double limit +) { + for (auto & prob : probabilities) { + if (!(prob.prob >= limit)) { + break; + } + auto & views = glyphs[prob.i]; + cache_element.insert(cache_element.end(), views.begin(), views.end()); + } +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/probabilities.hpp b/projects/ppocr/ppocr/ocr2/probabilities.hpp new file mode 100644 index 0000000000..5a1547d2c3 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/probabilities.hpp @@ -0,0 +1,106 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_PROBABILITIES_HPP +#define PPOCR_SRC_OCR2_PROBABILITIES_HPP + +#include +#include +#include + +namespace ppocr { namespace ocr2 { + +struct Probability +{ + unsigned i; + double prob; + + Probability(unsigned i) noexcept + : i(i) + {} + + Probability(unsigned i, double prob) noexcept + : i(i) + , prob(prob) + {} +}; + +struct Probabilities +{ + using iterator = Probability *; + using size_type = std::size_t; + + Probabilities(size_type sz) + : data(static_cast(::operator new(sz * sizeof(Probability)))) + , current(data) + {} + + template + Probabilities(It first, It last) + : data(static_cast(::operator new((last-first) * sizeof(Probability)))) + , current(data + (last-first)) + { std::copy(first, last, data); } + + Probabilities(Probabilities &&) = delete; + Probabilities(Probabilities const &) = delete; + + void swap(Probabilities & p) noexcept + { + std::swap(p.data, data); + std::swap(p.current, current); + } + + ~Probabilities() { + ::operator delete(this->data); + } + + iterator begin() const { return data; } + iterator end() const { return current; } + size_type size() const { return current - data; } + bool empty() const { return current == data; } + void push_back(Probability const & p) { *current++ = p; } + template + void emplace_back(Args const & ... args) { *current++ = {args...}; } + void clear() { current = data; } + + Probability const & front() const { return *data; } + Probability const & back() const { return *(current-1); } + Probability const & operator[](size_type i) const { return data[i]; } + + void resize(size_type n) { + current = data + n; + } + +private: + Probability * data; + Probability * current; +}; + +inline void swap(Probabilities & a, Probabilities & b) noexcept +{ a.swap(b); } + +struct GtProb { + GtProb() {} + bool operator()(Probability const & a, Probability const & b) const { + return a.prob > b.prob; + } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/reduce_universe.hpp b/projects/ppocr/ppocr/ocr2/reduce_universe.hpp new file mode 100644 index 0000000000..66db39a51d --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/reduce_universe.hpp @@ -0,0 +1,87 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_REDUCE_UNIVERSE_HPP +#define PPOCR_SRC_OCR2_REDUCE_UNIVERSE_HPP + +#include "probabilities.hpp" +#include "ppocr/loader2/datas_loader.hpp" +#include "ppocr/ocr2/data_indexes_by_words.hpp" + +#include + +namespace ppocr { namespace ocr2 { + +template +void reduce_universe_with_distance( + ocr2::Probabilities & a, + loader2::Data const & data, + T const & value, + Dist const & d +) { + a.resize(std::remove_if(a.begin(), a.end(), [&](ocr2::Probability const & prop) { + return data.get_relationship().in_dist(value, data[prop.i], d); + }) - a.begin()); +} + +template +void reduce_universe_and_update_probability( + ocr2::Probabilities & probabilities, + loader2::Data const & data, + T const & value, + double prob_limit +) { + auto it = probabilities.begin(); + for (auto & prob : probabilities) { + auto const x = data.dist(data[prob.i], value); + if (x >= prob_limit) { + *it = {prob.i, prob.prob * x}; + ++it; + } + } + probabilities.resize(it - probabilities.begin()); +} + +template +void reduce_universe_by_word( + ocr2::Probabilities & probabilities, + ocr2::DataIndexesByWords const & data_indexes_by_words, + Predicate predicate +) { + auto it = probabilities.begin(); + for (auto & prob : probabilities) { + if (data_indexes_by_words[prob.i].empty()) { + *it = prob; + ++it; + } + else { + for (auto i : data_indexes_by_words[prob.i]) { + if (predicate(i)) { + *it = prob; + ++it; + break; + } + } + } + } + probabilities.resize(it - probabilities.begin()); +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/replacement.cpp b/projects/ppocr/ppocr/ocr2/replacement.cpp new file mode 100644 index 0000000000..56c729816d --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/replacement.cpp @@ -0,0 +1,42 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/ocr2/replacement.hpp" + +#include + +std::istream& ppocr::ocr2::operator>>(std::istream& is, Replacements& replacements) { + using char_traits = std::char_traits; + Replacement rep; + while (is) { + is >> rep.pattern; + auto c = is.rdbuf()->sgetc(); + if (!char_traits::eq_int_type(c, char_traits::eof())) { + if (!char_traits::eq(char_traits::to_char_type(c), '\n')) { + is >> rep.replace; + } + else { + rep.replace.clear(); + } + } + if (is) { + replacements.push_back(rep); + } + } + return is; +} diff --git a/projects/ppocr/ppocr/ocr2/replacement.hpp b/projects/ppocr/ppocr/ocr2/replacement.hpp new file mode 100644 index 0000000000..928beaca44 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/replacement.hpp @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_CAPTURE_RDP_PPOCR_REPLACEMENT_HPP +#define PPOCR_CAPTURE_RDP_PPOCR_REPLACEMENT_HPP + +#include +#include +#include + +namespace ppocr { namespace ocr2 { + +struct Replacement { + std::string pattern; + std::string replace; +}; + +struct Replacements : std::vector +{ using std::vector::vector; }; + +std::istream & operator >> (std::istream & is, Replacements & replacements); + + +inline void replace_words(std::string & result, Replacements const & replacements) { + for (Replacement const & rep : replacements) { + std::string::size_type pos = 0; + while ((pos = result.find(rep.pattern, pos)) != std::string::npos) { + result.replace(pos, rep.pattern.size(), rep.replace); + } + } +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/sort_probabilities.hpp b/projects/ppocr/ppocr/ocr2/sort_probabilities.hpp new file mode 100644 index 0000000000..02f5053a19 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/sort_probabilities.hpp @@ -0,0 +1,77 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_SORT_PROBABILITIES_HPP +#define PPOCR_SRC_OCR2_SORT_PROBABILITIES_HPP + +#include "probabilities.hpp" + +#include + + +namespace ppocr { namespace ocr2 { + +struct EqProbByViews { + std::vector const & id_views; + bool operator()(ppocr::ocr2::Probability const & a, ppocr::ocr2::Probability const & b) const { + return id_views[a.i] == id_views[b.i]; + } +}; + +struct LtProbByViews { + std::vector const & id_views; + bool operator()(ppocr::ocr2::Probability const & a, ppocr::ocr2::Probability const & b) const { + if (id_views[a.i] == id_views[b.i]) { + return a.prob > b.prob; + } + return id_views[a.i] < id_views[b.i]; + } +}; + +inline void unique_copy_by_views( + ppocr::ocr2::Probabilities & out, + ppocr::ocr2::Probabilities const & probabilities, + std::vector const & id_views +) { + out.resize(std::unique_copy( + probabilities.begin(), + probabilities.end(), + out.begin(), + ppocr::ocr2::EqProbByViews{id_views} + ) - out.begin()); +} + +inline void unique_by_views(ppocr::ocr2::Probabilities & probabilities, std::vector const & id_views +) { + probabilities.resize( + std::unique(probabilities.begin(), probabilities.end(), EqProbByViews{id_views}) + - probabilities.begin() + ); +} + +inline void sort_by_views(ppocr::ocr2::Probabilities & probabilities, std::vector const & id_views) { + std::sort(probabilities.begin(), probabilities.end(), LtProbByViews{id_views}); +} + +inline void sort_by_prop(ppocr::ocr2::Probabilities & probabilities) { + std::sort(probabilities.begin(), probabilities.end(), ppocr::ocr2::GtProb{}); +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/views_index_ordered.hpp b/projects/ppocr/ppocr/ocr2/views_index_ordered.hpp new file mode 100644 index 0000000000..61028a0803 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/views_index_ordered.hpp @@ -0,0 +1,56 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_VIEWS_INDEX_ORDERED_HPP +#define PPOCR_SRC_OCR2_VIEWS_INDEX_ORDERED_HPP + +#include "ppocr/ocr2/glyphs.hpp" + +#include +#include + +namespace ppocr { namespace ocr2 { + +inline std::vector get_views_indexes_ordered(Glyphs const & glyphs) { + std::vector id_views; + if (!glyphs.empty()) { + std::vector> glyph_refs(glyphs.begin(), glyphs.end()); + std::sort(glyph_refs.begin(), glyph_refs.end(), LtViews{}); + + auto first = glyph_refs.begin() + 1; + auto last = glyph_refs.end(); + // re-assign duplication + for (; first != last; ++first) { + if (EqViews{}(first->get(), (first-1)->get())) { + *first = *(first-1); + } + } + + id_views.resize(glyphs.size()); + auto it = id_views.begin(); + for (Views const & g : glyph_refs) { + *it = static_cast(&g - &glyphs[0]); + ++it; + } + } + return id_views; +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/word_lines.cpp b/projects/ppocr/ppocr/ocr2/word_lines.cpp new file mode 100644 index 0000000000..ea3d98317a --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/word_lines.cpp @@ -0,0 +1,52 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/ocr2/word_lines.hpp" + +#include "ppocr/utils/read_file.hpp" + +#include + +std::istream& ppocr::ocr2::operator>>(std::istream& is, WordLines & wlines) { + unsigned ascentline; + unsigned capline; + unsigned meanline; + unsigned baseline; + + if (is >> ascentline >> capline >> meanline >> baseline) { + wlines.ascentline = ascentline; + wlines.capline = capline; + wlines.meanline = meanline; + wlines.baseline = baseline; + } + return is; +} + +std::istream& ppocr::ocr2::operator>>(std::istream& is, WWordsLines& wwords_lines) +{ + WWordLines wwl; + std::string s; + while (is >> s >> wwl.wlines) { + if (s.size() <= 8) { + memcpy(wwl.str_data, s.data(), s.size()); + wwl.str_len = static_cast(s.size()); + wwords_lines.push_back(wwl); + } + } + return is; +} diff --git a/projects/ppocr/ppocr/ocr2/word_lines.hpp b/projects/ppocr/ppocr/ocr2/word_lines.hpp new file mode 100644 index 0000000000..e51f6bdfaf --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/word_lines.hpp @@ -0,0 +1,63 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_CAPTURE_RDP_PPOCR_WORD_LINES_HPP +#define PPOCR_CAPTURE_RDP_PPOCR_WORD_LINES_HPP + +#include +#include +#include +#include + +namespace ppocr { namespace ocr2 { + +struct WordLines { + enum { + Unspecified = 0, + Upper = 1, + Below = 2, + Above = 4 + }; + uint16_t ascentline:3; + uint16_t capline:3; + uint16_t meanline:3; + uint16_t baseline:3; +}; + + +struct WWordLines +{ + char str_data[8]; + uint16_t str_len; + WordLines wlines; + + std::string_view sv() const + { + return {str_data, str_len}; + } +}; + +struct WWordsLines : std::vector +{ using std::vector::vector; }; + +std::istream & operator>>(std::istream & is, WWordsLines & wwords_lines); +std::istream & operator>>(std::istream & is, WordLines & wlines); + +} } + +#endif diff --git a/projects/ppocr/ppocr/ocr2/words_infos.hpp b/projects/ppocr/ppocr/ocr2/words_infos.hpp new file mode 100644 index 0000000000..778837dc13 --- /dev/null +++ b/projects/ppocr/ppocr/ocr2/words_infos.hpp @@ -0,0 +1,73 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_OCR2_WORDS_INFOS_HPP +#define PPOCR_SRC_OCR2_WORDS_INFOS_HPP + +#include "ppocr/ocr2/word_lines.hpp" +#include "ppocr/ocr2/glyphs.hpp" + +#include + + +namespace ppocr { namespace ocr2 { + +struct WordInfos { + unsigned word; + WordLines lines; +}; + +struct WordsInfos { + WordsInfos() = default; + WordsInfos(WordsInfos &&) = default; + WordsInfos(WordsInfos const &) = delete; + + WordsInfos(Glyphs const & glyphs, WWordsLines const & wwords_lines) + { + for (auto & p : wwords_lines) { + auto idx = glyphs.word_index_of(p.sv()); + if (idx != Glyphs::no_index) { + this->words_infos.push_back({idx, p.wlines}); + } + } + + std::sort( + this->words_infos.begin(), this->words_infos.end(), + [](WordInfos const & wi1, WordInfos const & wi2) { + return wi1.word < wi2.word; + } + ); + } + + WordInfos const * get(unsigned word) const { + auto it = std::lower_bound( + this->words_infos.begin(), this->words_infos.end(), word, + [](WordInfos const & info, unsigned w) { + return info.word < w; + } + ); + return it != this->words_infos.end() && it->word == word ? &*it : nullptr; + } + +private: + std::vector words_infos; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/spell/dictionary.cpp b/projects/ppocr/ppocr/spell/dictionary.cpp new file mode 100644 index 0000000000..dc3cfd74f5 --- /dev/null +++ b/projects/ppocr/ppocr/spell/dictionary.cpp @@ -0,0 +1,102 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr//spell/dictionary.hpp" +#include "ppocr/utils/utf.hpp" + +#include +#include + + +namespace ppocr { namespace spell { + +Dictionary::Dictionary(container::trie const & trie) +: trie_(trie) +{} + +Dictionary::Dictionary(container::flat_trie trie) +: trie_(std::move(trie)) +{} + +Dictionary::Dictionary(std::vector const & words) +{ + std::vector> uwords; + for (auto & old_word : words) { + std::vector new_word; + utf::UTF8Iterator it(old_word.data()); + for (uint32_t c; (c = *it); ++it) { + new_word.push_back(c); + } + uwords.push_back(std::move(new_word)); + } + + std::sort(uwords.begin(), uwords.end()); + uwords.erase(std::unique(uwords.begin(), uwords.end()), uwords.end()); + this->trie_ = container::trie(uwords.begin(), uwords.end()); +} + +namespace { + struct IODictionary : Dictionary::Manipulator + { using Dictionary::Manipulator::trie; }; +} + +std::ostream& operator<<(std::ostream& os, Dictionary const & dict) +{ + for (auto node : IODictionary().trie(dict).all()) { + os + << node.get() << " " + << node.relative_pos() << " " + << node.size() << " " + << node.is_terminal() + << "\n"; + } + return os; +} + +std::istream& operator>>(std::istream& is, Dictionary & dict) +{ + struct Saver { + std::ios & io_; + std::ios::fmtflags flags_; + + Saver(std::ios & io) + : io_(io), flags_(io.flags()) + { io.unsetf(std::ios::skipws); } + + ~Saver() + { this->io_.flags(flags_); } + } saver{is}; + + using trie_type = container::flat_trie; + std::vector nodes; + trie_type::value_type c; + unsigned pos; + unsigned sz; + bool terminal; + char e1, e2, e3, e4; + while (is >> c >> e1 >> pos >> e2 >> sz >> e3 >> terminal >> e4) { + if (e1 != ' ' || e2 != ' ' || e3 != ' ' || e4 != '\n') { + throw std::runtime_error("bad format"); + } + nodes.emplace_back(c, pos, sz, terminal); + } + dict = Dictionary(std::move(nodes)); + return is; +} + +} } diff --git a/projects/ppocr/ppocr/spell/dictionary.hpp b/projects/ppocr/ppocr/spell/dictionary.hpp new file mode 100644 index 0000000000..2cb64ebae9 --- /dev/null +++ b/projects/ppocr/ppocr/spell/dictionary.hpp @@ -0,0 +1,54 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_SPELL_DICTIONARY_HPP +#define PPOCR_SRC_SPELL_DICTIONARY_HPP + +#include "ppocr/container/trie.hpp" + +#include + +namespace ppocr { namespace spell { + +struct Dictionary +{ + class Manipulator { + protected: + using trie_type = container::flat_trie; + + trie_type const & trie(Dictionary const & dict) const + { return dict.trie_; } + }; + + Dictionary(std::vector const & words); + Dictionary(container::flat_trie trie); + Dictionary(container::trie const & trie); + Dictionary() = default; + + bool empty() const { return this->trie_.all().empty(); } + +private: + container::flat_trie trie_; +}; + +std::istream & operator >> (std::istream & is, Dictionary & dict); +std::ostream & operator << (std::ostream & os, Dictionary const & dict); + +} } + +#endif diff --git a/projects/ppocr/ppocr/spell/word_disambiguouser.hpp b/projects/ppocr/ppocr/spell/word_disambiguouser.hpp new file mode 100644 index 0000000000..28508acae9 --- /dev/null +++ b/projects/ppocr/ppocr/spell/word_disambiguouser.hpp @@ -0,0 +1,100 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_SPELL_WORD_DISAMBIGUOUSER_HPP +#define PPOCR_SRC_SPELL_WORD_DISAMBIGUOUSER_HPP + +#include "ppocr/spell/dictionary.hpp" +#include "ppocr/utils/utf.hpp" + +#include + + +namespace ppocr { namespace spell { + +struct WordDisambiguouser : Dictionary::Manipulator +{ + template + bool operator()(Dictionary const & dict, FwRngStrIt first, FwRngStrIt last, std::string & output) + { + if (first == last || this->trie(dict).all().empty()) { + return false; + } + auto sz = output.size(); + if (disambiguous_impl(this->trie(dict).childrens(), first, last, output)) { + std::swap_ranges(output.begin() + sz, output.begin() + sz + (output.size() - sz) / 2, output.rbegin()); + return true; + } + return false; + } + +private: + template + bool disambiguous_impl(trie_type::range rng, FwRngStrIt first, FwRngStrIt last, std::string & output) { + for (auto & s : *first) { + if (disambiguous_utf_char(rng, first, last, utf::UTF8Iterator(s.data()), output)) { + return true; + } + } + return false; + } + + template + bool disambiguous_utf_char(trie_type::range rng, FwRngStrIt first, FwRngStrIt last, utf::UTF8Iterator it, std::string & output) { + auto const c = *it; + auto pos = rng.lower_bound(c); + if (pos != rng.end() && pos->get() == c) { + auto next_first = first; + ++next_first; + if (next_first == last) { + if (pos->is_terminal()) { + set_c(output, c); + return true; + } + } + else if (!pos->empty()) { + if (*++it) { + if (disambiguous_utf_char(pos->childrens(), first, last, it, output)) { + set_c(output, c); + return true; + } + } + else if (disambiguous_impl(pos->childrens(), next_first, last, output)) { + set_c(output, c); + return true; + } + } + } + return false; + } + + void set_c(std::string & s, uint32_t code) { + char c[] = { + char((code & 0x000000FF)), + char((code & 0x0000FF00) >> 8), + char((code & 0x00FF0000) >> 16), + char((code & 0xFF000000) >> 24), + '\0' + }; + s += c; + } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/agravity.cpp b/projects/ppocr/ppocr/strategies/agravity.cpp new file mode 100644 index 0000000000..50f3e30e9e --- /dev/null +++ b/projects/ppocr/ppocr/strategies/agravity.cpp @@ -0,0 +1,80 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/agravity.hpp" +#include "ppocr/image/image.hpp" + +#include "ppocr/math/almost_equal.hpp" +#include "ppocr/strategies/utils/relationship.hpp" +#include "ppocr/strategies/utils/horizontal_gravity.hpp" + +#include +#include +#include + +#include +#include + + +namespace ppocr { namespace strategies { + +agravity::agravity(const Image& img, const Image& img90) +{ + utils::TopBottom const g1 = utils::horizontal_gravity(img); + utils::TopBottom const g2 = utils::horizontal_gravity(img90); + + using Signed = long; + + auto const h1 = Signed(g1.top) - Signed(g1.bottom); + auto const h2 = Signed(g2.top) - Signed(g2.bottom); + + if (h1 || h2) { + this->a = std::asin(double(h1) / std::sqrt(h1*h1+h2*h2)); + } +} + +bool agravity::operator==(const agravity& other) const +{ return almost_equal(this->a, other.a, 2); } + +bool agravity::operator<(agravity const & other) const +{ return a < other.a && !(*this == other); } + +unsigned agravity::relationship(const agravity& other) const +{ + // disable -Wfloat-equal + std::equal_to eq; + if (eq(angle(), null_angle()) || eq(other.angle(), null_angle())) { + return eq(other.angle(), angle()) ? 100 : 0; + } + return utils::compute_relationship(angle(), other.angle(), M_PI); +} + +std::istream& operator>>(std::istream& is, agravity& ag) +{ return is >> ag.a; } + +std::ostream& operator<<(std::ostream& os, const agravity& ag) +{ + auto const flags = os.setf(std::ios_base::fixed | std::ios_base::scientific, std::ios_base::floatfield); + auto const precision = os.precision(std::numeric_limits::max_exponent10); + os << ag.angle(); + os.precision(precision); + os.setf(flags, std::ios_base::floatfield); + return os; +} + +} } diff --git a/projects/ppocr/ppocr/strategies/agravity.hpp b/projects/ppocr/ppocr/strategies/agravity.hpp new file mode 100644 index 0000000000..04dfafafbf --- /dev/null +++ b/projects/ppocr/ppocr/strategies/agravity.hpp @@ -0,0 +1,58 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_AGRAVITY_HPP +#define PPOCR_STRATEGIES_AGRAVITY_HPP + +#include + +namespace ppocr { + +class Image; + +namespace strategies +{ + struct agravity + { + static constexpr double null_angle() { return 100000.; } + + agravity() = default; + agravity(double a) : a(a) {} + + agravity(const Image & img, const Image & img90); + + bool operator<(agravity const & other) const; + + bool operator==(agravity const & other) const; + + unsigned relationship(const agravity& other) const; + + double angle() const noexcept { return a; } + + friend std::istream & operator>>(std::istream &, agravity &); + + private: + double a = null_angle(); + }; + + std::ostream & operator<<(std::ostream &, agravity const &); +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/alternation.cpp b/projects/ppocr/ppocr/strategies/alternation.cpp new file mode 100644 index 0000000000..6e4707782b --- /dev/null +++ b/projects/ppocr/ppocr/strategies/alternation.cpp @@ -0,0 +1,98 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/alternation.hpp" +#include "ppocr/image/image.hpp" + +#include +#include +#include + +#ifdef DEBUG_ALTERNATION +#include +#define MAKE_SEQUENCE_ALTERNATION(name, img, x, y, B) \ + (void(std::cout << name ":[" << img.bounds() << "][" << Index x, y << "] + [" << B << "]\n"), \ + make_alternations(img, x, y, B)) +#else +#define MAKE_SEQUENCE_ALTERNATION(name, img, x, y, B) make_alternations(img, x, y, B) +#endif + +namespace ppocr { namespace strategies { + +namespace { + template + alternations::alternations_type + make_alternations(const Image & img, Index const & idx, T const & bounds) + { + auto range = hrange(img, idx, bounds); + auto it = range.begin(); + auto last = range.end(); + + alternations::alternations_type alternations{*it, 1}; + while (rng::next_alternation(it, last)) { + ++alternations.count; + } + + return alternations; + } +} + +alternations::value_type alternations::load(const Image& img, const Image& img90) const +{ + alternations::value_type seq_alternations; + auto it = seq_alternations.begin(); + + { + Bounds const & bnd = img.bounds(); + + if (bnd.h() < 2) { + *it++ = alternations_type{0, 0}; + } else { + *it++ = MAKE_SEQUENCE_ALTERNATION("Hl1", img, {0, (bnd.h()-2)/3}, bnd.w()); + } + *it++ = MAKE_SEQUENCE_ALTERNATION("Hl2", img, {0, (bnd.h()*2-1)/3}, bnd.w()); + Bounds const bnd_mask(bnd.w(), bnd.h()/3); + *it++ = MAKE_SEQUENCE_ALTERNATION("Hm1", img, {0, 0}, bnd_mask); + *it++ = MAKE_SEQUENCE_ALTERNATION("Hm2", img, {0, bnd.h()-bnd_mask.h()}, bnd_mask); + } + + { + Bounds const & bnd = img90.bounds(); + + *it++ = MAKE_SEQUENCE_ALTERNATION("Vl1", img90, {0, (bnd.h()-1)/2}, bnd.w()); + Bounds const bnd_mask(bnd.w(), bnd.h()/3); + *it++ = MAKE_SEQUENCE_ALTERNATION("Vm1", img90, {0, bnd.h()-bnd_mask.h()}, bnd_mask); + *it++ = MAKE_SEQUENCE_ALTERNATION("Vm2", img90, {0, 0}, bnd_mask); + } + + assert(it == seq_alternations.end()); + return seq_alternations; +} + +unsigned alternations::best_difference() const +{ return 10; } + +std::ostream & operator<<(std::ostream & os, alternations::alternations_type const & x) { + return os << x.start_contains_letter << ' ' << x.count; +} + +std::istream & operator>>(std::istream & is, alternations::alternations_type & x) { + return is >> x.start_contains_letter >> x.count; +} + +} } diff --git a/projects/ppocr/ppocr/strategies/alternation.hpp b/projects/ppocr/ppocr/strategies/alternation.hpp new file mode 100644 index 0000000000..264b30b857 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/alternation.hpp @@ -0,0 +1,119 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_ALTERNATION_HPP +#define PPOCR_STRATEGIES_ALTERNATION_HPP + +#include +#include + +#include "ppocr/strategies/relationship/array_compare.hpp" + +namespace ppocr { + +class Image; + +namespace strategies +{ + /** + * :::::::::::: + * :xx : + * :xx : + * :xx : + * :xx : + * Hl1, Hl2, :xx : + * Hm1, Hm2, :xxxxxxx : + * Vl1, :xxxxxxxx : + * Vm1, Vm2 :xx xxx: + * :xx xx: + * :xx xxx: + * :xxxxxxxxx : + * :xxxxxxx : + * :::::::::::: + * + * :::::::::::: + * :xx : ---------- + * :xx : :xx : + * :xx : :xx : Hm1 = 1, 0 + * ---------- :xx : h = (img.h/3) + * |xx | Hl1 = 1, 0 ; y = ((img.h-2)/3) :xx : + * ---------- ---------- + * :xx : :xx : + * :xxxxxxx : :xxxxxxx : + * :xxxxxxxx : :xxxxxxxx : + * ---------- :xx xxx: + * |xx xxx| Hl2 = 1, 0, 1 ; y = ((img.h*2-1)/3) ---------- + * ---------- :xx xx: + * :xx xx: :xx xxx: Hm2 = 1 + * :xx xxx: :xxxxxxxxx : h = (img.h/3) + * :xxxxxxxxx : :xxxxxxx : + * :xxxxxxx : ---------- + * :::::::::::: + * + * + * Vl1 = 0, 1, 0, 1 ; x = (img.w-1)/2 Vm1 = 1 ; w = img.w/3 ; Vm2 = 0, 1, 0 + * ::::::_::::::: :___::::::___: + * :xx | | : |xx | | | + * :xx | | : |xx | | | + * :xx | | : |xx | | | + * :xx | | : |xx | | | + * :xx | | : |xx | | | + * :xxxx|x|xx : |xxx|xxxx| | + * :xxxx|x|xxx : |xxx|xxxx|x | + * :xx | | xxx: |xx | |xxx| + * :xx | | xx: |xx | | xx| + * :xx | | xxx: |xx | |xxx| + * :xxxx|x|xxxx : |xxx|xxxx|xx | + * :xxxx|x|xx : |xxx|xxxx| | + * ::::::-::::::: :---::::::---: + */ +struct alternations +{ + struct alternations_type { + bool start_contains_letter; + std::size_t count; + + bool operator==(alternations_type const & other) const noexcept + { return start_contains_letter == other.start_contains_letter && count == other.count; } + + bool operator<(alternations_type const & other) const noexcept + { + return count < other.count + || (count == other.count && start_contains_letter < other.start_contains_letter); + } + }; + + using relationship_type = array_compare_relationship; + using value_type = relationship_type::value_type; + + static constexpr bool one_axis = false; + + value_type load(Image const & img, Image const & /*img90*/) const; + + constexpr relationship_type relationship() const { return {}; } + unsigned best_difference() const; +}; + +std::ostream & operator<<(std::ostream &, alternations::alternations_type const &); +std::istream & operator>>(std::istream &, alternations::alternations_type &); + +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/compass.cpp b/projects/ppocr/ppocr/strategies/compass.cpp new file mode 100644 index 0000000000..ba0f2eb0a6 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/compass.cpp @@ -0,0 +1,53 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/compass.hpp" +#include "ppocr/image/image.hpp" + +#include "ppocr/strategies/utils/relationship.hpp" +#include "ppocr/strategies/utils/horizontal_direction.hpp" +#include "ppocr/strategies/utils/cardinal_direction_io.hpp" + +#include +#include + +namespace ppocr { namespace strategies { + +static int horizontal_compass(const Image& img) +{ + utils::TopBottom d = utils::horizontal_direction(img); + + return (d.top > d.bottom) ? 1 + : (d.top < d.bottom) ? 3 + : 2; +} + +compass::compass(const Image& img, const Image& img90) +: d(static_cast(horizontal_compass(img) | horizontal_compass(img90) << 2)) +{} + +unsigned int compass::relationship(const compass& other) const +{ return utils::cardinal_relationship(d, other.d); } + +std::istream& operator>>(std::istream& is, compass& d) +{ return is >> d.d; } + +std::ostream& operator<<(std::ostream& os, const compass& d) +{ return os << d.direction(); } + +} } diff --git a/projects/ppocr/ppocr/strategies/compass.hpp b/projects/ppocr/ppocr/strategies/compass.hpp new file mode 100644 index 0000000000..6e55cbac1e --- /dev/null +++ b/projects/ppocr/ppocr/strategies/compass.hpp @@ -0,0 +1,61 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_COMPASS_HPP +#define PPOCR_STRATEGIES_COMPASS_HPP + +#include + +#include "ppocr/strategies/utils/cardinal_direction.hpp" + +namespace ppocr { + +class Image; + +namespace strategies +{ + struct compass + { + using cardinal_direction = utils::CardinalDirection; + + compass() = default; + + compass(const Image & img, const Image & img90); + + bool operator<(compass const & other) const + { return d < other.d; } + + bool operator==(compass const & other) const + { return d == other.d; } + + unsigned relationship(const compass& other) const; + + cardinal_direction direction() const noexcept { return d; } + + friend std::istream & operator>>(std::istream &, compass &); + + private: + cardinal_direction d = cardinal_direction::NONE; + }; + + std::ostream & operator<<(std::ostream &, compass const &); +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/dcompass.cpp b/projects/ppocr/ppocr/strategies/dcompass.cpp new file mode 100644 index 0000000000..dbce58fd62 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dcompass.cpp @@ -0,0 +1,77 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/dcompass.hpp" +#include "ppocr/image/image.hpp" + +#include "ppocr/strategies/utils/relationship.hpp" +#include "ppocr/strategies/utils/cardinal_direction_io.hpp" + +#include +#include + + +namespace ppocr { namespace strategies { + +static unsigned count_dcompass(Bounds const & bnd, Pixel const * p, Pixel const * ep, bool is_top) +{ + unsigned d = 0; + size_t ih = 0; + size_t const wdiv2 = bnd.w()/2; + for (; p != ep; p += bnd.w(), ++ih) { + size_t x = wdiv2 - bnd.w() / (!is_top ? bnd.h() - ih : 1 + ih) / 2; + auto leftp = p + x; + auto rightp = p + bnd.w() - x; + for (; leftp != rightp; ++leftp) { + if (is_pix_letter(*leftp)) { + ++d; + } + } + } + return d; +} + +static int horizontal_dcompass(const Image& img) +{ + Bounds const bnd(img.width(), img.height() / 2); + auto p = img.data(); + auto ep = img.data({0, bnd.h()}); + long const top = count_dcompass(bnd, p, ep, true); + p = ep; + if (img.height() & 1) { + p += img.width(); + } + long const bottom = count_dcompass(bnd, p, img.data_end(), false); + + return top < bottom ? 1 : top > bottom ? 3 : 2; +} + +dcompass::dcompass(const Image& img, const Image& img90) +: d(static_cast(horizontal_dcompass(img) | horizontal_dcompass(img90) << 2)) +{} + +unsigned dcompass::relationship(const dcompass& other) const +{ return utils::cardinal_relationship(d, other.d); } + +std::istream& operator>>(std::istream& is, dcompass& d) +{ return is >> d.d; } + +std::ostream& operator<<(std::ostream& os, const dcompass& d) +{ return os << d.direction(); } + +} } diff --git a/projects/ppocr/ppocr/strategies/dcompass.hpp b/projects/ppocr/ppocr/strategies/dcompass.hpp new file mode 100644 index 0000000000..0107c803f0 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dcompass.hpp @@ -0,0 +1,63 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_DCOMPASS_HPP +#define PPOCR_STRATEGIES_DCOMPASS_HPP + +#include + +#include "ppocr/strategies/utils/cardinal_direction.hpp" + +namespace ppocr { + +class Image; + +namespace strategies +{ + struct dcompass + { + using cardinal_direction = utils::CardinalDirection; + + dcompass() = default; + + dcompass(cardinal_direction d) : d(d) {} + + dcompass(const Image & img, const Image & img90); + + bool operator<(dcompass const & other) const + { return d < other.d; } + + bool operator==(dcompass const & other) const + { return d == other.d; } + + unsigned relationship(const dcompass& other) const; + + cardinal_direction direction() const noexcept { return d; } + + friend std::istream & operator>>(std::istream &, dcompass &); + + private: + cardinal_direction d = cardinal_direction::NONE; + }; + + std::ostream & operator<<(std::ostream &, dcompass const &); +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/density.cpp b/projects/ppocr/ppocr/strategies/density.cpp new file mode 100644 index 0000000000..e01390b430 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/density.cpp @@ -0,0 +1,44 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/density.hpp" + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace strategies { + +density::value_type density::load(const Image& img, const Image& /*img90*/) const +{ + size_t count = 0; + auto first = img.data(); + auto last = img.data_end(); + for (; first != last; ++first) { + if (is_pix_letter(*first)) { + ++count; + } + } + return count * 100 / img.area(); +} + +density::relationship_type density::relationship() const +{ return {100}; } + +unsigned density::best_difference() const +{ return 5; } + +} } diff --git a/projects/ppocr/ppocr/strategies/density.hpp b/projects/ppocr/ppocr/strategies/density.hpp new file mode 100644 index 0000000000..d5d0887542 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/density.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_DENSITY_HPP +#define PPOCR_SRC_STRATEGIES_DENSITY_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct density +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = false; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/direction.cpp b/projects/ppocr/ppocr/strategies/direction.cpp new file mode 100644 index 0000000000..05bd35b45c --- /dev/null +++ b/projects/ppocr/ppocr/strategies/direction.cpp @@ -0,0 +1,51 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/direction.hpp" +#include "ppocr/image/image.hpp" + +#include "ppocr/strategies/utils/relationship.hpp" +#include "ppocr/strategies/utils/horizontal_zone.hpp" +#include "ppocr/strategies/utils/horizontal_direction.hpp" +#include "ppocr/strategies/utils/cardinal_direction_io.hpp" + +#include +#include + +#include + + +namespace ppocr { namespace strategies { + +direction::direction(const Image& img, const Image& img90) +: d(static_cast( + utils::horizontal_zone_1_2_3_4_5(img, utils::horizontal_direction(img)) + | (utils::horizontal_zone_1_2_3_4_5(img90, utils::horizontal_direction(img90)) << 3) +)) +{} + +unsigned direction::relationship(const direction& other) const +{ return utils::cardinal_relationship(d, other.d); } + +std::istream& operator>>(std::istream& is, direction& d) +{ return is >> d.d; } + +std::ostream& operator<<(std::ostream& os, const direction& d) +{ return os << d.id(); } + +} } diff --git a/projects/ppocr/ppocr/strategies/direction.hpp b/projects/ppocr/ppocr/strategies/direction.hpp new file mode 100644 index 0000000000..888e96e1d2 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/direction.hpp @@ -0,0 +1,63 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_DIRECTION_HPP +#define PPOCR_STRATEGIES_DIRECTION_HPP + +#include + +#include "ppocr/strategies/utils/cardinal_direction.hpp" + +namespace ppocr { + +class Image; + +namespace strategies +{ + struct direction + { + using cardinal_direction = utils::CardinalDirection2; + + direction() = default; + + direction(const Image & img, const Image & img90); + + direction(cardinal_direction direction) : d(direction) {} + + bool operator<(direction const & other) const + { return d < other.d; } + + bool operator==(direction const & other) const + { return d == other.d; } + + unsigned relationship(const direction& other) const; + + cardinal_direction id() const noexcept { return d; } + + friend std::istream & operator>>(std::istream &, direction &); + + private: + cardinal_direction d = cardinal_direction::NONE; + }; + + std::ostream & operator<<(std::ostream &, direction const &); +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/dvdirection.cpp b/projects/ppocr/ppocr/strategies/dvdirection.cpp new file mode 100644 index 0000000000..5d6381c6be --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvdirection.cpp @@ -0,0 +1,40 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/dvdirection.hpp" + +#include "ppocr/strategies/utils/diagonal_vertical_direction.hpp" + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace strategies { + +dvdirection::value_type dvdirection::load(Image const & img, Image const & /*img90*/) const +{ + auto const top_bottom = utils::diagonal_vertical_direction(img); + auto const sum = top_bottom.top + top_bottom.bottom; + return sum ? (sum + top_bottom.top - top_bottom.bottom) * 100 / sum : 100; +} + +dvdirection::relationship_type dvdirection::relationship() const +{ return {200}; } + +unsigned dvdirection::best_difference() const +{ return 10; } + +} } diff --git a/projects/ppocr/ppocr/strategies/dvdirection.hpp b/projects/ppocr/ppocr/strategies/dvdirection.hpp new file mode 100644 index 0000000000..0ecde7320f --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvdirection.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_DVDIRECTION_HPP +#define PPOCR_SRC_STRATEGIES_DVDIRECTION_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct dvdirection +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/dvdirection2.cpp b/projects/ppocr/ppocr/strategies/dvdirection2.cpp new file mode 100644 index 0000000000..081cc78b8a --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvdirection2.cpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/dvdirection2.hpp" + +#include "ppocr/strategies/utils/diagonal_vertical_direction.hpp" + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace strategies { + +dvdirection2::value_type dvdirection2::load(const Image& img, const Image&) const +{ + auto const area = utils::diagonal_vertical_direction_area(img); + if (!area) { + return 50; + } + auto const top_bottom = utils::diagonal_vertical_direction(img); + auto const sum = top_bottom.top + top_bottom.bottom; + auto const ret = (sum + top_bottom.top - top_bottom.bottom) * 100 / area; + return ret; +} + +dvdirection2::relationship_type dvdirection2::relationship() const +{ return {100}; } + +unsigned dvdirection2::best_difference() const +{ return 5; } + +} } diff --git a/projects/ppocr/ppocr/strategies/dvdirection2.hpp b/projects/ppocr/ppocr/strategies/dvdirection2.hpp new file mode 100644 index 0000000000..cb9cd93e88 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvdirection2.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_DVDIRECTION2_HPP +#define PPOCR_SRC_STRATEGIES_DVDIRECTION2_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct dvdirection2 +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/dvgravity.cpp b/projects/ppocr/ppocr/strategies/dvgravity.cpp new file mode 100644 index 0000000000..50b2086985 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvgravity.cpp @@ -0,0 +1,40 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/dvgravity.hpp" + +#include "ppocr/strategies/utils/diagonal_vertical_gravity.hpp" + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace strategies { + +dvgravity::value_type dvgravity::load(Image const & img, Image const & /*img90*/) const +{ + auto const top_bottom = utils::diagonal_vertical_gravity(img); + auto const sum = top_bottom.top + top_bottom.bottom; + return sum ? (sum + top_bottom.top - top_bottom.bottom) * 100 / sum : 100; +} + +dvgravity::relationship_type dvgravity::relationship() const +{ return 200; } + +unsigned dvgravity::best_difference() const +{ return 10; } + +} } diff --git a/projects/ppocr/ppocr/strategies/dvgravity.hpp b/projects/ppocr/ppocr/strategies/dvgravity.hpp new file mode 100644 index 0000000000..93b2efee65 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvgravity.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_DVGRAVITY_HPP +#define PPOCR_SRC_STRATEGIES_DVGRAVITY_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct dvgravity +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/dvgravity2.cpp b/projects/ppocr/ppocr/strategies/dvgravity2.cpp new file mode 100644 index 0000000000..9ac3f36abc --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvgravity2.cpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/dvgravity2.hpp" + +#include "ppocr/strategies/utils/diagonal_vertical_gravity.hpp" + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace strategies { + +dvgravity2::value_type dvgravity2::load(Image const & img, Image const & /*img90*/) const +{ + auto const area = utils::diagonal_vertical_gravity_area(img); + if (!area) { + return 50; + } + auto const top_bottom = utils::diagonal_vertical_gravity(img); + auto const sum = top_bottom.top + top_bottom.bottom; + auto const ret = (sum + top_bottom.top - top_bottom.bottom) * 100 / area; + return ret; +} + +dvgravity2::relationship_type dvgravity2::relationship() const +{ return {100}; } + +unsigned dvgravity2::best_difference() const +{ return 5; } + +} } diff --git a/projects/ppocr/ppocr/strategies/dvgravity2.hpp b/projects/ppocr/ppocr/strategies/dvgravity2.hpp new file mode 100644 index 0000000000..3a2eb44a60 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dvgravity2.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_DVGRAVITY2_HPP +#define PPOCR_SRC_STRATEGIES_DVGRAVITY2_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct dvgravity2 +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/dzdensity.cpp b/projects/ppocr/ppocr/strategies/dzdensity.cpp new file mode 100644 index 0000000000..ba8b2aa4f1 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dzdensity.cpp @@ -0,0 +1,75 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/dzdensity.hpp" + +#include "ppocr/image/image.hpp" + +#include + +namespace ppocr { namespace strategies { + +namespace { + template + void dzdensity_load_impl(Image const & img, Fn f) { + int const area = int(img.area()); + int const h3 = int(img.height() * 3); + int const w3 = int(img.width() * 3); + int yp = 0; + for (size_t y = 0; y < img.height(); ++y, yp += w3) { + //double const d = double(img.width())/3.; + //double const xs = std::ceil(double(y+1)*w/h-d); + //double const xe = std::floor(double(y )*w/h+d); + int const xs_ = (w3 + yp - area + h3-1) / h3; + int const xe_ = ( yp + area ) / h3; + + int const xs = std::max(0, xs_); + int const xe = std::min(int(img.width()), xe_); + + f(xs, xe); + } + } +} + +dzdensity::value_type dzdensity::load(Image const & img, Image const & /*img90*/) const +{ + if (img.width() < 3 || img.height() < 3) { + return 0; + } + + int res = 0; + int area = 0; + auto data = img.data(); + dzdensity_load_impl(img, [&](int xs, int xe) { + xs = std::max(0, xs); + xe = std::min(int(img.width()), xe); + res += std::count_if(data+xs, data+xe, is_pix_letter_fn()); + area += xe-xs; + data += img.width(); + }); + + return dzdensity::value_type(res * 100 / area); +} + +dzdensity::relationship_type dzdensity::relationship() const +{ return {100}; } + +unsigned dzdensity::best_difference() const +{ return 5; } + +} } diff --git a/projects/ppocr/ppocr/strategies/dzdensity.hpp b/projects/ppocr/ppocr/strategies/dzdensity.hpp new file mode 100644 index 0000000000..bbc8ae2122 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/dzdensity.hpp @@ -0,0 +1,57 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_DZDENSITY_HPP +#define PPOCR_SRC_STRATEGIES_DZDENSITY_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +/* + ____/__ + | oo/#.| + |o /..#|/ + |o/..../ + |/####/| + /..../o| + /|#../ o| + |.#/oo | + -/---- +*/ + +struct dzdensity +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/gravity.cpp b/projects/ppocr/ppocr/strategies/gravity.cpp new file mode 100644 index 0000000000..e021e99d0b --- /dev/null +++ b/projects/ppocr/ppocr/strategies/gravity.cpp @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/gravity.hpp" +#include "ppocr/image/image.hpp" + +#include "ppocr/strategies/utils/relationship.hpp" +#include "ppocr/strategies/utils/horizontal_gravity.hpp" +#include "ppocr/strategies/utils/horizontal_zone.hpp" +#include "ppocr/strategies/utils/cardinal_direction_io.hpp" + +#include +#include + +#include + +namespace ppocr { namespace strategies { + +gravity::gravity(const Image& img, const Image& img90) +: d(static_cast( + utils::horizontal_zone_1_2_3_4_5(img, utils::horizontal_gravity(img)) + | (utils::horizontal_zone_1_2_3_4_5(img90, utils::horizontal_gravity(img90)) << 3) +)) +{} + +unsigned gravity::relationship(const gravity& other) const +{ return utils::cardinal_relationship(d, other.d); } + +std::istream& operator>>(std::istream& is, gravity& d) +{ return is >> d.d; } + +std::ostream& operator<<(std::ostream& os, const gravity& d) +{ return os << d.id(); } + +} } diff --git a/projects/ppocr/ppocr/strategies/gravity.hpp b/projects/ppocr/ppocr/strategies/gravity.hpp new file mode 100644 index 0000000000..a060f9eafb --- /dev/null +++ b/projects/ppocr/ppocr/strategies/gravity.hpp @@ -0,0 +1,63 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_GRAVITY_HPP +#define PPOCR_STRATEGIES_GRAVITY_HPP + +#include + +#include "ppocr/strategies/utils/cardinal_direction.hpp" + +namespace ppocr { + +class Image; + +namespace strategies +{ + struct gravity + { + using cardinal_direction = utils::CardinalDirection2; + + gravity() = default; + + gravity(const Image & img, const Image & img90); + + gravity(cardinal_direction g) : d(g) {} + + bool operator<(gravity const & other) const + { return d < other.d; } + + bool operator==(gravity const & other) const + { return d == other.d; } + + unsigned relationship(const gravity& other) const; + + cardinal_direction id() const noexcept { return d; } + + friend std::istream & operator>>(std::istream &, gravity &); + + private: + cardinal_direction d = cardinal_direction::NONE; + }; + + std::ostream & operator<<(std::ostream &, gravity const &); +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/gravity2.cpp b/projects/ppocr/ppocr/strategies/gravity2.cpp new file mode 100644 index 0000000000..b6851f9343 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/gravity2.cpp @@ -0,0 +1,60 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/gravity2.hpp" +#include "ppocr/image/image.hpp" + +#include "ppocr/strategies/utils/relationship.hpp" +#include "ppocr/strategies/utils/horizontal_gravity.hpp" + +#include +#include + +#include + + +namespace ppocr {namespace strategies { + +static unsigned horizontal_gravity2(const Image& img) +{ + utils::TopBottom g = utils::horizontal_gravity(img); + + size_t const hby2 = img.height()/2; + int const gimg = ((hby2 * (hby2 + 1)) / 2) * img.width(); + unsigned ret = (gimg ? (int(g.top) - int(g.bottom)) * 100 / gimg : 0) + 100; + + assert(ret <= 200); + return ret; +} + +gravity2::gravity2(const Image& img, const Image& img90) +: d(horizontal_gravity2(img) | (horizontal_gravity2(img90) << 9)) +{ + static_assert(sizeof(int) > 9*3/9, "short type"); +} + +unsigned gravity2::relationship(const gravity2& other) const +{ return utils::mask_relationship(d, other.d, (1 << 9)-1, 9, 400); } + +std::istream& operator>>(std::istream& is, gravity2& d) +{ return is >> d.d; } + +std::ostream& operator<<(std::ostream& os, const gravity2& d) +{ return os << d.id(); } + +} } diff --git a/projects/ppocr/ppocr/strategies/gravity2.hpp b/projects/ppocr/ppocr/strategies/gravity2.hpp new file mode 100644 index 0000000000..0779d36771 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/gravity2.hpp @@ -0,0 +1,59 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_GRAVITY2_HPP +#define PPOCR_STRATEGIES_GRAVITY2_HPP + +#include + +namespace ppocr { + +class Image; + +namespace strategies +{ + struct gravity2 + { + gravity2() = default; + + gravity2(const Image & img, const Image & img90); + + gravity2(int d) : d(d) {} + + bool operator<(gravity2 const & other) const + { return d < other.d; } + + bool operator==(gravity2 const & other) const + { return d == other.d; } + + unsigned relationship(const gravity2& other) const; + + unsigned id() const noexcept { return d; } + + friend std::istream & operator>>(std::istream &, gravity2 &); + + private: + unsigned d = 0; + }; + + std::ostream & operator<<(std::ostream &, gravity2 const &); +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/hbar.cpp b/projects/ppocr/ppocr/strategies/hbar.cpp new file mode 100644 index 0000000000..3f55dd1921 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hbar.cpp @@ -0,0 +1,46 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/hbar.hpp" +#include "ppocr/image/image.hpp" + +#include + +namespace ppocr { namespace strategies { + +hbar::value_type hbar::load(const Image& img, const Image& /*img90*/) const +{ + hbar::value_type n{}; + auto p = img.data(); + auto const e = img.data_end(); + bool previous_is_plain = false; + for (; p != e; p += img.width()) { + if (std::all_of(p, p+img.width(), is_pix_letter_fn())) { + if (!previous_is_plain) { + previous_is_plain = true; + ++n; + } + } + else { + previous_is_plain = false; + } + } + return n; +} + +} } diff --git a/projects/ppocr/ppocr/strategies/hbar.hpp b/projects/ppocr/ppocr/strategies/hbar.hpp new file mode 100644 index 0000000000..a1b4b53dc9 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hbar.hpp @@ -0,0 +1,47 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_HBAR_HPP +#define PPOCR_SRC_STRATEGIES_HBAR_HPP + +#include "ppocr/strategies/relationship/equal.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct hbar +{ + using value_type = unsigned; + using relationship_type = equal_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + constexpr relationship_type relationship() const { return {}; } + constexpr unsigned best_difference() const { return 0; } +}; + +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/hdirection.cpp b/projects/ppocr/ppocr/strategies/hdirection.cpp new file mode 100644 index 0000000000..8399750d45 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hdirection.cpp @@ -0,0 +1,40 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr//strategies/hdirection.hpp" + +#include "ppocr/strategies/utils/horizontal_direction.hpp" + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace strategies { + +hdirection::value_type hdirection::load(Image const & img, Image const & /*img90*/) const +{ + auto const top_bottom = utils::horizontal_direction(img); + auto const sum = top_bottom.top + top_bottom.bottom; + return sum ? (sum + top_bottom.top - top_bottom.bottom) * 100 / sum : 100; +} + +hdirection::relationship_type hdirection::relationship() const +{ return {200}; } + +unsigned hdirection::best_difference() const +{ return 10; } + +} } diff --git a/projects/ppocr/ppocr/strategies/hdirection.hpp b/projects/ppocr/ppocr/strategies/hdirection.hpp new file mode 100644 index 0000000000..69d29c3907 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hdirection.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_HDIRECTION_HPP +#define PPOCR_SRC_STRATEGIES_HDIRECTION_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct hdirection +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/hdirection2.cpp b/projects/ppocr/ppocr/strategies/hdirection2.cpp new file mode 100644 index 0000000000..069f44fc21 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hdirection2.cpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr//strategies/hdirection2.hpp" + +#include "ppocr/strategies/utils/horizontal_direction.hpp" + +#include "ppocr/image/image.hpp" + + +namespace ppocr { namespace strategies { + +hdirection2::value_type hdirection2::load(Image const & img, Image const & /*img90*/) const +{ + auto const area = img.height() / 2 * img.width(); + if (!area) { + return 50; + } + auto const top_bottom = utils::horizontal_direction(img); + auto const ret = (area + top_bottom.top - top_bottom.bottom) * 100 / (area * 2); + return ret; +} + +hdirection2::relationship_type hdirection2::relationship() const +{ return {100}; } + +unsigned hdirection2::best_difference() const +{ return 5; } + +} } diff --git a/projects/ppocr/ppocr/strategies/hdirection2.hpp b/projects/ppocr/ppocr/strategies/hdirection2.hpp new file mode 100644 index 0000000000..e330c52bc6 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hdirection2.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_HDIRECTION2_HPP +#define PPOCR_SRC_STRATEGIES_HDIRECTION2_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct hdirection2 +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/hgravity.cpp b/projects/ppocr/ppocr/strategies/hgravity.cpp new file mode 100644 index 0000000000..d89e2a6917 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hgravity.cpp @@ -0,0 +1,40 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr//strategies/hgravity.hpp" + +#include "ppocr/strategies/utils/horizontal_gravity.hpp" + +#include "ppocr/image/image.hpp" + +namespace ppocr { namespace strategies { + +hgravity::value_type hgravity::load(Image const & img, Image const & /*img90*/) const +{ + auto const top_bottom = utils::horizontal_gravity(img); + auto const sum = top_bottom.top + top_bottom.bottom; + return sum ? (sum + top_bottom.top - top_bottom.bottom) * 100 / sum : 100; +} + +hgravity::relationship_type hgravity::relationship() const +{ return {200}; } + +unsigned hgravity::best_difference() const +{ return 10; } + +} } diff --git a/projects/ppocr/ppocr/strategies/hgravity.hpp b/projects/ppocr/ppocr/strategies/hgravity.hpp new file mode 100644 index 0000000000..103380b41e --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hgravity.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_HGRAVITY_HPP +#define PPOCR_SRC_STRATEGIES_HGRAVITY_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct hgravity +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/hgravity2.cpp b/projects/ppocr/ppocr/strategies/hgravity2.cpp new file mode 100644 index 0000000000..0f2e24ca60 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hgravity2.cpp @@ -0,0 +1,46 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/hgravity2.hpp" + +#include "ppocr/strategies/utils/horizontal_gravity.hpp" + +#include "ppocr/image/image.hpp" + + +namespace ppocr { namespace strategies { + +hgravity2::value_type hgravity2::load(Image const & img, Image const & /*img90*/) const +{ + auto const hby2 = img.height()/2; + if (!hby2) { + return 50; + } + auto const area = ((hby2 * (hby2 + 1)) / 2) * img.width(); + auto const top_bottom = utils::horizontal_gravity(img); + auto const ret = (area + top_bottom.top - top_bottom.bottom) * 100 / (area * 2); + return ret; +} + +hgravity2::relationship_type hgravity2::relationship() const +{ return {100}; } + +unsigned hgravity2::best_difference() const +{ return 5; } + +} } diff --git a/projects/ppocr/ppocr/strategies/hgravity2.hpp b/projects/ppocr/ppocr/strategies/hgravity2.hpp new file mode 100644 index 0000000000..e7e64e3d5f --- /dev/null +++ b/projects/ppocr/ppocr/strategies/hgravity2.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_HGRAVITY2_HPP +#define PPOCR_SRC_STRATEGIES_HGRAVITY2_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct hgravity2 +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/proportionality.cpp b/projects/ppocr/ppocr/strategies/proportionality.cpp new file mode 100644 index 0000000000..10fa2190dd --- /dev/null +++ b/projects/ppocr/ppocr/strategies/proportionality.cpp @@ -0,0 +1,34 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/proportionality.hpp" +#include "ppocr/image/image.hpp" + + +namespace ppocr { namespace strategies { + +proportionality::value_type proportionality::load(Image const & img, Image const & /*img90*/) const +{ return img.width() * 100 / (img.width() + img.height()); } + +proportionality::relationship_type proportionality::relationship() const +{ return {100}; } + +unsigned proportionality::best_difference() const +{ return 5; } + +} } diff --git a/projects/ppocr/ppocr/strategies/proportionality.hpp b/projects/ppocr/ppocr/strategies/proportionality.hpp new file mode 100644 index 0000000000..110396005e --- /dev/null +++ b/projects/ppocr/ppocr/strategies/proportionality.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_PROPORTIONALITY_HPP +#define PPOCR_STRATEGIES_PROPORTIONALITY_HPP + +#include "ppocr/strategies/relationship/interval.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct proportionality +{ + using value_type = unsigned; + using relationship_type = interval_relationship; + + static constexpr bool one_axis = true; + + value_type load(Image const & img, Image const & /*img90*/) const; + + relationship_type relationship() const; + unsigned best_difference() const; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/proportionality_zone.cpp b/projects/ppocr/ppocr/strategies/proportionality_zone.cpp new file mode 100644 index 0000000000..3ba7d79d15 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/proportionality_zone.cpp @@ -0,0 +1,102 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/proportionality_zone.hpp" +#include "ppocr/image/image.hpp" +#include "ppocr/strategies/utils/count_zone.hpp" +#include "ppocr/strategies/utils/relationship.hpp" + +#include +#include +#include + +namespace ppocr { namespace strategies { + +proportionality_zone::value_type proportionality_zone::load(Image const & img, Image const & /*img90*/) const +{ + proportionality_zone::value_type ret; + + utils::ZoneInfo zone_info = utils::count_zone(img); + + for (unsigned i = 0; i < zone_info.len; ++i) { + if (zone_info.top()[i]) { + // skip + } + if (zone_info.right()[i]) { + zone_info.top()[i] = zone_info.right()[i]; + } + else if (zone_info.bottom()[i]) { + zone_info.top()[i] = zone_info.bottom()[i]; + } + else if (zone_info.left()[i]) { + zone_info.top()[i] = zone_info.left()[i]; + } + } + + unsigned area = 0; + for (auto n : zone_info.top()) { + area += n; + } + + for (auto n : zone_info.top()) { + if (n) { + ret.push_back(n * 100 / area); + } + } + + std::sort(ret.begin(), ret.end()); + + return ret; +} + +unsigned proportionality_zone::relationship_type::operator()(const value_type& a, const value_type& b) const +{ + if (a.size() != b.size()) { + return 0; + } + if (a.empty()) { + return 100; + } + unsigned const total = std::inner_product( + a.begin(), a.end(), b.begin(), 0u + , std::plus() + , [](unsigned a, unsigned b) { return a*100/(a+b); } + ) / a.size(); + return utils::compute_relationship(total, 50u, 100u); +} + +std::size_t proportionality_zone::relationship_type::count() const +{ return 101; } + + +double proportionality_zone::relationship_type::dist( + proportionality_zone::relationship_type::value_type const & a, + proportionality_zone::relationship_type::value_type const & b +) const { + return static_cast(operator()(a, b)) / 100.; +} + +bool proportionality_zone::relationship_type::in_dist( + proportionality_zone::relationship_type::value_type const & a, + proportionality_zone::relationship_type::value_type const & b, + unsigned d +) const { + return operator()(a, b) >= d; +} + +} } diff --git a/projects/ppocr/ppocr/strategies/proportionality_zone.hpp b/projects/ppocr/ppocr/strategies/proportionality_zone.hpp new file mode 100644 index 0000000000..c77a956b9b --- /dev/null +++ b/projects/ppocr/ppocr/strategies/proportionality_zone.hpp @@ -0,0 +1,57 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_PROPORTIONALITY_ZONE_HPP +#define PPOCR_SRC_STRATEGIES_PROPORTIONALITY_ZONE_HPP + +#include + +namespace ppocr { + +class Image; + +namespace strategies { + +struct proportionality_zone { + struct relationship_type { + using value_type = std::vector; + using result_type = unsigned; + + constexpr relationship_type() noexcept {} + + result_type operator()(value_type const & a, value_type const & b) const; + + /// \return [0, 1] + double dist(value_type const & a, value_type const & b) const; + + bool in_dist(value_type const & a, value_type const & b, unsigned d) const; + + std::size_t count() const; + }; + using value_type = relationship_type::value_type; + + value_type load(Image const & img, Image const & /*img90*/) const; + + static constexpr bool one_axis() { return false; } + constexpr relationship_type relationship() const { return {}; } + constexpr unsigned best_difference() const { return 20u; } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/relationship/array_compare.hpp b/projects/ppocr/ppocr/strategies/relationship/array_compare.hpp new file mode 100644 index 0000000000..affa46080e --- /dev/null +++ b/projects/ppocr/ppocr/strategies/relationship/array_compare.hpp @@ -0,0 +1,59 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_RELATIONSHIP_ARRAY_COMPARE_RELATIONSHIP_HPP +#define PPOCR_SRC_STRATEGIES_RELATIONSHIP_ARRAY_COMPARE_RELATIONSHIP_HPP + +#include + +namespace ppocr { namespace strategies { + +template +struct array_compare_relationship +{ + using value_type = std::array; + using result_type = R; + + constexpr array_compare_relationship() noexcept {} + + result_type operator()(value_type const & a, value_type const & b) const + { + R n{}; + auto it = std::begin(a); + for (auto const & i : b) { + if (*it == i) { + ++n; + } + ++it; + } + return R(n * R{100} / a.size()); + } + + /// \return [0, 1] + double dist(value_type const & a, value_type const & b) const + { return static_cast(operator()(a, b)) / 100.; } + + bool in_dist(value_type const & a, value_type const & b, unsigned d) const + { return static_cast(operator()(a, b)) >= d; } + + std::size_t count() const { return 101; } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/relationship/equal.hpp b/projects/ppocr/ppocr/strategies/relationship/equal.hpp new file mode 100644 index 0000000000..c8dbc1096c --- /dev/null +++ b/projects/ppocr/ppocr/strategies/relationship/equal.hpp @@ -0,0 +1,51 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_RELATIONSHIP_EQUAL_RELATIONSHIP_HPP +#define PPOCR_SRC_STRATEGIES_RELATIONSHIP_EQUAL_RELATIONSHIP_HPP + +#include + +namespace ppocr { namespace strategies { + +template +struct equal_relationship +{ + using value_type = T; + using result_type = bool; + + constexpr static bool is_contiguous = true; + + constexpr equal_relationship() noexcept {} + + result_type operator()(value_type const & a, value_type const & b) const + { return a == b; } + + /// \return [0, 1] + double dist(value_type const & a, value_type const & b) const + { return a == b ? 1. : 0.; } + + bool in_dist(value_type const & a, value_type const & b, value_type const &) const + { return a == b; } + + std::size_t count() const { return 2; } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/relationship/interval.hpp b/projects/ppocr/ppocr/strategies/relationship/interval.hpp new file mode 100644 index 0000000000..e09649c768 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/relationship/interval.hpp @@ -0,0 +1,56 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_RELATIONSHIP_INTERVAL_RELATIONSHIP_HPP +#define PPOCR_SRC_STRATEGIES_RELATIONSHIP_INTERVAL_RELATIONSHIP_HPP + +#include "ppocr/strategies/utils/relationship.hpp" + +namespace ppocr { namespace strategies { + +template +struct interval_relationship +{ + using value_type = T; + using result_type = R; + + constexpr static bool is_contiguous = true; + + constexpr interval_relationship(T const & interval) noexcept + : interval_(interval) + {} + + result_type operator()(value_type const & a, value_type const & b) const + { return utils::compute_relationship(a, b, interval_); } + + /// \return [0, 1] + double dist(value_type const & a, value_type const & b) const + { return static_cast(operator()(a, b)) / 100.; } + + bool in_dist(value_type const & a, value_type const & b, value_type const & d) const + { return (a < b ? (b > a + d) : (a > b + d)); } + + std::size_t count() const { return std::size_t(this->interval_) + 1; } + +private: + value_type interval_; +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/basic_proportionality.cpp b/projects/ppocr/ppocr/strategies/utils/basic_proportionality.cpp new file mode 100644 index 0000000000..f99f772880 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/basic_proportionality.cpp @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/utils/basic_proportionality.hpp" +#include "ppocr/strategies/utils/relationship.hpp" +#include + +#include +#include + + +namespace ppocr { namespace strategies { + +void details_::check_interval(unsigned x, unsigned interval) +{ + assert(x <= interval); + (void)x; + (void)interval; +} + +unsigned proportionality_base::relationship(const proportionality_base& other, unsigned interval) const +{ return utils::compute_relationship(value(), other.value(), interval); } + +std::istream& operator>>(std::istream& is, proportionality_base & prop) +{ + decltype(prop.value()) proportion; + is >> proportion; + prop = proportionality_base(proportion); + return is; +} + +std::ostream& operator<<(std::ostream& os, const proportionality_base & prop) +{ return os << prop.value(); } + +} } diff --git a/projects/ppocr/ppocr/strategies/utils/basic_proportionality.hpp b/projects/ppocr/ppocr/strategies/utils/basic_proportionality.hpp new file mode 100644 index 0000000000..4e17809e1f --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/basic_proportionality.hpp @@ -0,0 +1,84 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_UTILS_BASIC_PROPORTIONALITY_HPP +#define PPOCR_SRC_STRATEGIES_UTILS_BASIC_PROPORTIONALITY_HPP + +#include + + +namespace ppocr { + +class Image; + +namespace strategies { + +struct proportionality_base +{ + proportionality_base() = default; + proportionality_base(unsigned proportion) : proportion_(proportion) {} + + bool operator<(proportionality_base const & other) const + { return proportion_ < other.proportion_; } + + bool operator==(proportionality_base const & other) const + { return this->proportion_ == other.proportion_; } + + unsigned value() const noexcept { return proportion_; } + + friend std::istream & operator>>(std::istream &, proportionality_base &); + +protected: + unsigned relationship(proportionality_base const & other, unsigned interval) const; + +private: + unsigned proportion_ = 0; +}; + +std::ostream & operator<<(std::ostream &, proportionality_base const &); + +namespace details_ { + void check_interval(unsigned x, unsigned interval); +} + +template +struct basic_proportionality : proportionality_base +{ + using traits = Traits; + + basic_proportionality(unsigned proportion) + : proportionality_base(proportion) + { details_::check_interval(proportion, traits::get_interval()); } + + basic_proportionality() = default; + + basic_proportionality(const Image & img, const Image & img90) + : proportionality_base(traits::compute(img, img90)) + {} + + unsigned relationship(basic_proportionality const & other) const + { return proportionality_base::relationship(other, traits::get_interval()); } + + unsigned best_difference() const { return Traits::best_difference(); } +}; + +} + +} + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/cardinal_direction.hpp b/projects/ppocr/ppocr/strategies/utils/cardinal_direction.hpp new file mode 100644 index 0000000000..6b2b6988a3 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/cardinal_direction.hpp @@ -0,0 +1,68 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_UTILS_CARDINAL_DIRECTION_HPP +#define PPOCR_STRATEGIES_UTILS_CARDINAL_DIRECTION_HPP + +namespace ppocr { namespace strategies { namespace utils { + +enum class CardinalDirection : unsigned +{ + NONE = 2 + (2 << 2), + SOUTH = 1 + (2 << 2), + NORTH = 3 + (2 << 2), + WEST = 2 + (1 << 2), + EAST = 2 + (3 << 2), + NORTH_EAST = 3 + (3 << 2), + NORTH_WEST = 3 + (1 << 2), + SOUTH_EAST = 1 + (3 << 2), + SOUTH_WEST = 1 + (1 << 2), +}; + +enum class CardinalDirection2 : unsigned +{ + NONE = 3 + (3 << 3), + NORTH2 = 5 + (3 << 3), + NORTH = 4 + (3 << 3), + SOUTH = 2 + (3 << 3), + SOUTH2 = 1 + (3 << 3), + EAST2 = 3 + (5 << 3), + EAST = 3 + (4 << 3), + WEST = 3 + (2 << 3), + WEST2 = 3 + (1 << 3), + NORTH_EAST = (NORTH & 7) | (EAST & (7<<3)), + NORTH2_EAST = (NORTH2 & 7) | (EAST & (7<<3)), + NORTH_EAST2 = (NORTH & 7) | (EAST2 & (7<<3)), + NORTH2_EAST2 = (NORTH2 & 7) | (EAST2 & (7<<3)), + NORTH_WEST = (NORTH & 7) | (WEST & (7<<3)), + NORTH2_WEST = (NORTH2 & 7) | (WEST & (7<<3)), + NORTH_WEST2 = (NORTH & 7) | (WEST2 & (7<<3)), + NORTH2_WEST2 = (NORTH2 & 7) | (WEST2 & (7<<3)), + SOUTH_EAST = (SOUTH & 7) | (EAST & (7<<3)), + SOUTH2_EAST = (SOUTH2 & 7) | (EAST & (7<<3)), + SOUTH_EAST2 = (SOUTH & 7) | (EAST2 & (7<<3)), + SOUTH2_EAST2 = (SOUTH2 & 7) | (EAST2 & (7<<3)), + SOUTH_WEST = (SOUTH & 7) | (WEST & (7<<3)), + SOUTH2_WEST = (SOUTH2 & 7) | (WEST & (7<<3)), + SOUTH_WEST2 = (SOUTH & 7) | (WEST2 & (7<<3)), + SOUTH2_WEST2 = (SOUTH2 & 7) | (WEST2 & (7<<3)), +}; + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/cardinal_direction_io.hpp b/projects/ppocr/ppocr/strategies/utils/cardinal_direction_io.hpp new file mode 100644 index 0000000000..bb1a63f3ce --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/cardinal_direction_io.hpp @@ -0,0 +1,56 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_UTILS_CARDINAL_DIRECTION_IO_HPP +#define PPOCR_STRATEGIES_UTILS_CARDINAL_DIRECTION_IO_HPP + +#include +#include +#include + +#include "ppocr/strategies/utils/cardinal_direction.hpp" + + +namespace ppocr { namespace strategies { namespace utils { + +inline std::ostream & operator<<(std::ostream & os, CardinalDirection d) +{ return os << static_cast::type>(d); } + +inline std::istream & operator>>(std::istream & is, CardinalDirection & d) +{ + std::underlying_type::type i; + is >> i; + d = static_cast(i); + return is; +} + +inline std::ostream & operator<<(std::ostream & os, CardinalDirection2 d) +{ return os << static_cast::type>(d); } + +inline std::istream & operator>>(std::istream & is, CardinalDirection2 & d) +{ + std::underlying_type::type i; + is >> i; + d = static_cast(i); + return is; +} + +} } } + + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/count_zone.hpp b/projects/ppocr/ppocr/strategies/utils/count_zone.hpp new file mode 100644 index 0000000000..f2b0708b49 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/count_zone.hpp @@ -0,0 +1,141 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_UTILS_COUNT_ZONE_HPP +#define PPOCR_SRC_STRATEGIES_UTILS_COUNT_ZONE_HPP + +#include "ppocr/image/image.hpp" + +#include +#include + +namespace ppocr { namespace strategies { namespace utils +{ + struct unsigned_array_view + { + unsigned* data; + unsigned len; + + unsigned* begin() const { return data; } + unsigned* end() const { return data + len; } + + unsigned count_non_zero() const + { + unsigned n = 0; + for (unsigned x : *this) { + n += x ? 1 : 0; + } + return n; + } + + unsigned& operator[](unsigned i) + { + assert(i < len); + return data[i]; + } + }; + + struct ZoneInfo { + std::unique_ptr stack; + unsigned len = 0; + unsigned count_zone = 1; + + unsigned_array_view top() { return {stack.get(), len}; } + unsigned_array_view right() { return {top().end(), len}; } + unsigned_array_view bottom() { return {right().end(), len}; } + unsigned_array_view left() { return {bottom().end(), len}; } + + void alloc(unsigned n) + { + len = n; + stack = std::make_unique(n); + } + }; + + inline ZoneInfo count_zone(const Image& img) { + ZoneInfo zone; + std::vector mirror(img.area() * 2, 0); + unsigned* const stack = mirror.data() + img.area(); + + for (unsigned i = 0; i < img.area(); ++i) { + if (mirror[i] || is_pix_letter(img.data()[i])) { + continue; + } + + unsigned idx = i; + auto stack_it = stack; + mirror[idx] = zone.count_zone; + for (;;) { + auto x = idx % img.width(); + auto y = idx / img.width(); + + auto push_if = [&](unsigned idx){ + if (!mirror[idx] && !is_pix_letter(img.data()[idx])) { + mirror[idx] = zone.count_zone; + *stack_it++ = idx; + } + }; + + if (y != 0) { + push_if(idx - img.width()); + } + if (y + 1 != img.height()) { + push_if(idx + img.width()); + } + if (x != 0) { + push_if(idx - 1); + } + if (x + 1 != img.width()) { + push_if(idx + 1); + } + + if (stack_it == stack) { + break; + } + + idx = *--stack_it; + } + + zone.count_zone++; + } + + zone.alloc(zone.count_zone - 1); + + auto insert = [&](unsigned_array_view av, size_t x, size_t y) { + auto i = img.to_size_t({x, y}); + if (mirror[i]) { + ++av[mirror[i] - 1]; + } + }; + + for (size_t x = 0; x < img.width(); ++x) { + insert(zone.top(), x, 0); + insert(zone.bottom(), x, img.height()-1); + } + + for (size_t y = 0; y < img.height(); ++y) { + insert(zone.left(), 0, y); + insert(zone.right(), img.width()-1, y); + } + + return zone; + } + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/diagonal_vertical_direction.hpp b/projects/ppocr/ppocr/strategies/utils/diagonal_vertical_direction.hpp new file mode 100644 index 0000000000..6bd9326291 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/diagonal_vertical_direction.hpp @@ -0,0 +1,98 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_UTILS_DIAGONAL_VERTICAL_DIRECTION_HPP +#define PPOCR_SRC_STRATEGIES_UTILS_DIAGONAL_VERTICAL_DIRECTION_HPP + +#include "ppocr/image/image.hpp" +#include "ppocr/strategies/utils/top_bottom.hpp" + +#include + +namespace ppocr { namespace strategies { namespace utils { + +namespace details_ { + inline unsigned count_diagonal_vertical_direction( + Bounds const & bnd, Pixel const * p, Pixel const * ep, bool is_top + ) { + unsigned d = 0; + size_t ih = 0; + size_t const wdiv2 = bnd.w()/2; + for (; p != ep; p += bnd.w(), ++ih) { + size_t x = wdiv2 - bnd.w() / (!is_top ? bnd.h() - ih : 1 + ih) / 2; + auto leftp = p + x; + auto rightp = p + bnd.w() - x; + for (; leftp != rightp; ++leftp) { + if (is_pix_letter(*leftp)) { + ++d; + } + } + } + return d; + } + + inline unsigned count_diagonal_vertical_area( + Bounds const & bnd, Pixel const * p, Pixel const * ep, bool is_top + ) { + unsigned d = 0; + size_t ih = 0; + size_t const wdiv2 = bnd.w()/2; + for (; p != ep; p += bnd.w(), ++ih) { + size_t x = wdiv2 - bnd.w() / (!is_top ? bnd.h() - ih : 1 + ih) / 2; + d += bnd.w() - x*2; + } + return d; + } +} + +inline unsigned diagonal_vertical_direction_area(const Image& img) +{ + Bounds const bnd(img.width(), img.height() / 2); + auto p = img.data(); + auto ep = img.data({0, bnd.h()}); + + unsigned area = 0; + size_t ih = 0; + size_t const wdiv2 = bnd.w()/2; + for (; p != ep; p += bnd.w(), ++ih) { + size_t x = wdiv2 - bnd.w() / (1 + ih) / 2; + area += bnd.w() - x*2; + } + return area * 2; +} + +inline +TopBottom diagonal_vertical_direction(const Image& img) +{ + Bounds const bnd(img.width(), img.height() / 2); + auto p = img.data(); + auto ep = img.data({0, bnd.h()}); + auto const top = details_::count_diagonal_vertical_direction(bnd, p, ep, true); + + p = ep; + if (img.height() & 1) { + p += img.width(); + } + auto const bottom = details_::count_diagonal_vertical_direction(bnd, p, img.data_end(), false); + + return {top, bottom}; +} + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/diagonal_vertical_gravity.hpp b/projects/ppocr/ppocr/strategies/utils/diagonal_vertical_gravity.hpp new file mode 100644 index 0000000000..fd04e3b2fd --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/diagonal_vertical_gravity.hpp @@ -0,0 +1,85 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_UTILS_DIAGONAL_VERTICAL_GRAVITY_HPP +#define PPOCR_SRC_STRATEGIES_UTILS_DIAGONAL_VERTICAL_GRAVITY_HPP + +#include "ppocr/image/image.hpp" +#include "ppocr/strategies/utils/top_bottom.hpp" + +namespace ppocr { namespace strategies { namespace utils { + +namespace details_ { + inline unsigned count_diagonal_vertical_gravity( + Bounds const & bnd, Pixel const * p, Pixel const * ep, bool is_top + ) { + unsigned g = 0; + unsigned coef = is_top ? bnd.h() : 1; + size_t ih = 0; + size_t const wdiv2 = bnd.w()/2; + for (; p != ep; p += bnd.w(), ++ih) { + size_t x = wdiv2 - bnd.w() / (!is_top ? bnd.h() - ih : 1 + ih) / 2; + auto leftp = p + x; + auto rightp = p + bnd.w() - x; + for (; leftp != rightp; ++leftp) { + if (is_pix_letter(*leftp)) { + g += coef; + } + } + (is_top ? --coef : ++coef); + } + return g; + } +} + +inline unsigned diagonal_vertical_gravity_area(const Image& img) +{ + Bounds const bnd(img.width(), img.height() / 2); + auto p = img.data(); + auto ep = img.data({0, bnd.h()}); + + unsigned area = 0; + size_t ih = 0; + size_t const wdiv2 = bnd.w()/2; + for (; p != ep; p += bnd.w(), ++ih) { + size_t x = wdiv2 - bnd.w() / (bnd.h() - ih) / 2; + area += (bnd.w() - x*2) * (ih+1); + } + return area * 2; +} + +inline +TopBottom diagonal_vertical_gravity(const Image& img) +{ + Bounds const bnd(img.width(), img.height() / 2); + auto p = img.data(); + auto ep = img.data({0, bnd.h()}); + auto const top = details_::count_diagonal_vertical_gravity(bnd, p, ep, true); + + p = ep; + if (img.height() & 1) { + p += img.width(); + } + auto const bottom = details_::count_diagonal_vertical_gravity(bnd, p, img.data_end(), false); + + return {top, bottom}; +} + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/horizontal_direction.hpp b/projects/ppocr/ppocr/strategies/utils/horizontal_direction.hpp new file mode 100644 index 0000000000..7b7a7d9062 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/horizontal_direction.hpp @@ -0,0 +1,52 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_UTILS_HORIZONTAL_DIRECTION_HPP +#define PPOCR_STRATEGIES_UTILS_HORIZONTAL_DIRECTION_HPP + +#include "ppocr/image/image.hpp" +#include "ppocr/strategies/utils/top_bottom.hpp" + +namespace ppocr { namespace strategies { namespace utils { + +inline +TopBottom horizontal_direction(const Image& img) +{ + unsigned top = 0; + unsigned bottom = 0; + auto p = img.data(); + for (auto ep = img.data({0, img.height() / 2}); p != ep; ++p) { + if (is_pix_letter(*p)) { + ++top; + } + } + if (img.height() & 1) { + p += img.width(); + } + for (auto ep = img.data_end(); p != ep; ++p) { + if (is_pix_letter(*p)) { + ++bottom; + } + } + + return {top, bottom}; +} + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/horizontal_gravity.hpp b/projects/ppocr/ppocr/strategies/utils/horizontal_gravity.hpp new file mode 100644 index 0000000000..77d843a023 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/horizontal_gravity.hpp @@ -0,0 +1,70 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_UTILS_HORIZONTAL_GRAVITY_HPP +#define PPOCR_STRATEGIES_UTILS_HORIZONTAL_GRAVITY_HPP + +#include "ppocr/image/image.hpp" +#include "ppocr/strategies/utils/top_bottom.hpp" + + +namespace ppocr { namespace strategies { namespace utils { + +/** + * Multiply each pixel with the distance [\a bnd.h(), y] + */ +inline +unsigned horizontal_box_gravity(Bounds const & bnd, Pixel const * p, Pixel const * ep, bool is_top) +{ + unsigned /*long*/ g = 0; + unsigned h = is_top ? bnd.h() : 1; + while (p != ep) { + for (auto epl = p + bnd.w(); p != epl; ++p) { + if (is_pix_letter(*p)) { + g += h; + } + } + + if (is_top) { + --h; + } + else { + ++h; + } + } + return g; +} + +inline TopBottom horizontal_gravity(const Image& img) +{ + Bounds const bnd(img.width(), img.height() / 2); + auto p = img.data(); + auto ep = img.data({0, bnd.h()}); + unsigned const top = horizontal_box_gravity(bnd, p, ep, true); + p = ep; + if (img.height() & 1) { + p += img.width(); + } + unsigned const bottom = horizontal_box_gravity(bnd, p, img.data_end(), false); + + return {top, bottom}; +} + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/horizontal_zone.hpp b/projects/ppocr/ppocr/strategies/utils/horizontal_zone.hpp new file mode 100644 index 0000000000..a30c4da0a2 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/horizontal_zone.hpp @@ -0,0 +1,49 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_UTILS_HORIZONTAL_ZONE_HPP +#define PPOCR_STRATEGIES_UTILS_HORIZONTAL_ZONE_HPP + +#include "ppocr/image/image.hpp" +#include "ppocr/strategies/utils/top_bottom.hpp" + +namespace ppocr { namespace strategies { namespace utils { + +inline unsigned horizontal_box_zone_0_1_2(const Image& img, unsigned d) +{ + if (d > img.height() / 4) { + if (d > img.height()) { + return 2; + } + return 1; + } + return 0; +} + +inline unsigned horizontal_zone_1_2_3_4_5(const Image& img, TopBottom const & top_bottom) +{ + return (top_bottom.top > top_bottom.bottom) + ? 3 + utils::horizontal_box_zone_0_1_2(img, top_bottom.top - top_bottom.bottom) + : (top_bottom.top < top_bottom.bottom) + ? 3 - utils::horizontal_box_zone_0_1_2(img, top_bottom.bottom - top_bottom.top) + : 3; +} + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/relationship.hpp b/projects/ppocr/ppocr/strategies/utils/relationship.hpp new file mode 100644 index 0000000000..ca51a554e4 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/relationship.hpp @@ -0,0 +1,67 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_UTILS_RELATIONSHIP_HPP +#define PPOCR_UTILS_RELATIONSHIP_HPP + +#include + +#include "ppocr/strategies/utils/cardinal_direction.hpp" +#include + + +namespace ppocr { namespace strategies { namespace utils { + +template +unsigned compute_relationship(T const & a, T const & b, U const & interval) +{ + using integer = typename std::conditional::value, int, unsigned>::type; + auto d = integer{100} - ((a < b) ? b-a : a-b) * integer{100} / interval; + assert(0 <= d && d <= 100); + return unsigned(d); +} + +inline +unsigned mask_relationship(unsigned a, unsigned b, unsigned mask, unsigned rbit, unsigned interval) +{ + unsigned const a1 = a & mask; + unsigned const a2 = b & mask; + unsigned const b1 = (a & (mask << rbit)) >> rbit; + unsigned const b2 = (b & (mask << rbit)) >> rbit; + + unsigned dist = 100 - ( + (a1 < a2 ? a2 - a1 : a1 - a2) + + (b1 < b2 ? b2 - b1 : b1 - b2) + ) * 50 / interval; + assert(dist <= 100); + return dist; +} + + +inline +unsigned cardinal_relationship(CardinalDirection a, CardinalDirection b) +{ return mask_relationship(static_cast(a), static_cast(b), 3, 2, 2); } + +inline +unsigned cardinal_relationship(CardinalDirection2 a, CardinalDirection2 b) +{ return mask_relationship(static_cast(a), static_cast(b), 7, 3, 4); } + + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/utils/top_bottom.hpp b/projects/ppocr/ppocr/strategies/utils/top_bottom.hpp new file mode 100644 index 0000000000..b6735321b0 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/utils/top_bottom.hpp @@ -0,0 +1,28 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_STRATEGIES_UTILS_TOP_BOTTOM_HPP +#define PPOCR_STRATEGIES_UTILS_TOP_BOTTOM_HPP + +namespace ppocr { namespace strategies { namespace utils { + +struct TopBottom { unsigned top, bottom; }; + +} } } + +#endif diff --git a/projects/ppocr/ppocr/strategies/zone.cpp b/projects/ppocr/ppocr/strategies/zone.cpp new file mode 100644 index 0000000000..2e20a3b846 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/zone.cpp @@ -0,0 +1,48 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/strategies/zone.hpp" +#include "ppocr/image/image.hpp" +#include "ppocr/strategies/utils/count_zone.hpp" + + +namespace ppocr { namespace strategies { + +zone::value_type zone::load(const Image& img, const Image& /*img*/) const +{ + utils::ZoneInfo zone_info = utils::count_zone(img); + + zone::value_type ret; + + ret[top_left_is_letter] = is_pix_letter(img[{0, 0}]); + ret[bottom_right_is_letter] = is_pix_letter(img[{img.width()-1, img.height()-1}]); + ret[number_top_alternations] = zone_info.top().count_non_zero(); + ret[number_right_alternations] = zone_info.right().count_non_zero(); + ret[number_bottom_alternations] = zone_info.bottom().count_non_zero(); + ret[number_left_alternations] = zone_info.left().count_non_zero(); + + for (unsigned i = 0; i < zone_info.len; ++i) { + zone_info.top()[i] += zone_info.right()[i] + zone_info.bottom()[i] + zone_info.left()[i]; + } + + ret[number_internal_alternations] = zone_info.count_zone - 1 - zone_info.top().count_non_zero(); + + return ret; +} + +} } diff --git a/projects/ppocr/ppocr/strategies/zone.hpp b/projects/ppocr/ppocr/strategies/zone.hpp new file mode 100644 index 0000000000..371f5431d3 --- /dev/null +++ b/projects/ppocr/ppocr/strategies/zone.hpp @@ -0,0 +1,57 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_STRATEGIES_ZONE_HPP +#define PPOCR_SRC_STRATEGIES_ZONE_HPP + +#include + +#include "ppocr/strategies/relationship/array_compare.hpp" + +namespace ppocr { + +class Image; + +namespace strategies { + +struct zone { + enum { + top_left_is_letter, + bottom_right_is_letter, + number_top_alternations, + number_right_alternations, + number_bottom_alternations, + number_left_alternations, + number_internal_alternations, + number_index + }; + + using relationship_type = array_compare_relationship; + using value_type = relationship_type::value_type; + + static constexpr bool one_axis = false; + + value_type load(const Image & img, const Image & /*img90*/) const; + + constexpr relationship_type relationship() const { return {}; } + constexpr unsigned best_difference() const { return 20; } +}; + +} } + +#endif diff --git a/projects/ppocr/ppocr/utils/enum_io.hpp b/projects/ppocr/ppocr/utils/enum_io.hpp new file mode 100644 index 0000000000..2302bf169d --- /dev/null +++ b/projects/ppocr/ppocr/utils/enum_io.hpp @@ -0,0 +1,62 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_UTILS_ENUM_IO_HPP +#define PPOCR_SRC_UTILS_ENUM_IO_HPP + +#include +#include +#include + + +namespace ppocr { namespace utils { + +template +inline std::ostream & write_enum(std::ostream & os, EnumT e) +{ return os << static_cast::type>(e); } + +template +inline std::istream & read_enum(std::istream & is, EnumT & e) +{ + typename std::underlying_type::type i; + is >> i; + e = static_cast(i); + return is; +} + +namespace details { + + template struct ReadEnum { E & e; }; + template struct WriteEnum { E e; }; + + template + inline std::istream & operator>>(std::istream & is, ReadEnum e) + { return read_enum(is, e.e); } + + template + inline std::ostream & operator<<(std::ostream & os, WriteEnum e) + { return write_enum(os, e.e); } + +} + +template inline details::WriteEnum write_enum(E e) { return {e}; } +template inline details::ReadEnum read_enum(E & e) { return {e}; } + +} } + +#endif diff --git a/projects/ppocr/ppocr/utils/image_compare.cpp b/projects/ppocr/ppocr/utils/image_compare.cpp new file mode 100644 index 0000000000..d1c932a7f3 --- /dev/null +++ b/projects/ppocr/ppocr/utils/image_compare.cpp @@ -0,0 +1,48 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/utils/image_compare.hpp" +#include "ppocr/image/image.hpp" + +#include + +namespace ppocr { + +int image_compare(Image const & a, Image const & b) +{ + if (a.width() < b.width()) { + return -1; + } + if (a.width() > b.width()) { + return 1; + } + if (a.height() < b.height()) { + return -1; + } + if (a.height() > b.height()) { + return 1; + } + + auto const pair = std::mismatch(a.data(), a.data_end(), b.data()); + if (pair.first == a.data_end()) { + return 0; + } + return (*pair.first < *pair.second) ? -1 : 1; +} + +} diff --git a/projects/ppocr/ppocr/utils/image_compare.hpp b/projects/ppocr/ppocr/utils/image_compare.hpp new file mode 100644 index 0000000000..0cecfe07ea --- /dev/null +++ b/projects/ppocr/ppocr/utils/image_compare.hpp @@ -0,0 +1,49 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_UTILS_IMAGE_COMPARE_HPP +#define PPOCR_UTILS_IMAGE_COMPARE_HPP + +#include + +namespace ppocr { + +class Image; + +int image_compare(Image const & a, Image const & b); + +} + +namespace std { + template<> + struct less< ::ppocr::Image> + { + constexpr less() noexcept {} + + bool operator()(::ppocr::Image const & a, ::ppocr::Image const & b) const + { return ::ppocr::image_compare(a, b) < 0; } + }; +} + +namespace ppocr { + +using image_less = std::less; + +} + +#endif diff --git a/projects/ppocr/ppocr/utils/integer_iterator.hpp b/projects/ppocr/ppocr/utils/integer_iterator.hpp new file mode 100644 index 0000000000..57283ff36e --- /dev/null +++ b/projects/ppocr/ppocr/utils/integer_iterator.hpp @@ -0,0 +1,49 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#pragma once + +namespace ppocr +{ + template + struct integer_iterator + { + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = void; + using const_reference = value_type const &; + using reference = const_reference; + + value_type n; + integer_iterator & operator++() { ++n; return *this; } + reference operator*() const { return n; } + difference_type operator-(integer_iterator const & other) const { return this->n - other.n; } + + integer_iterator& operator+=(difference_type n) { this->n += n; return *this; } + integer_iterator& operator-=(difference_type n) { this->n -= n; return *this; } + + integer_iterator operator+(difference_type n) const { return integer_iterator{T(this->n + n)}; } + integer_iterator operator-(difference_type n) const { return integer_iterator{T(this->n - n)}; } + + bool operator==(integer_iterator const & other) const { return this->n == other.n; } + bool operator!=(integer_iterator const & other) const { return this->n != other.n; } + bool operator<(integer_iterator const & other) const { return this->n < other.n; } + bool operator<=(integer_iterator const & other) const { return this->n < other.n; } + }; +} diff --git a/projects/ppocr/ppocr/utils/make_unique.hpp b/projects/ppocr/ppocr/utils/make_unique.hpp new file mode 100644 index 0000000000..a72eef8ed3 --- /dev/null +++ b/projects/ppocr/ppocr/utils/make_unique.hpp @@ -0,0 +1,57 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_UTILS_MAKE_UNIQUE_HPP +#define PPOCR_SRC_UTILS_MAKE_UNIQUE_HPP + +#include + +#if __cplusplus == 201103L +namespace std { + template + struct _MakeUniq + { typedef unique_ptr<_Tp> __single_object; }; + + template + struct _MakeUniq<_Tp[]> + { typedef unique_ptr<_Tp[]> __array; }; + + template + struct _MakeUniq<_Tp[_Bound]> + { struct __invalid_type { }; }; + + /// std::make_unique for single objects + template + inline typename _MakeUniq<_Tp>::__single_object + make_unique(_Args&&... __args) + { return unique_ptr<_Tp>(new _Tp(std::forward<_Args>(__args)...)); } + + /// std::make_unique for arrays of unknown bound + template + inline typename _MakeUniq<_Tp>::__array + make_unique(size_t __num) + { return unique_ptr<_Tp>(new typename remove_extent<_Tp>::type[__num]()); } + + /// Disable std::make_unique for arrays of known bound + template + inline typename _MakeUniq<_Tp>::__invalid_type + make_unique(_Args&&...) = delete; +} +#endif + +#endif diff --git a/projects/ppocr/ppocr/utils/range_iterator.hpp b/projects/ppocr/ppocr/utils/range_iterator.hpp new file mode 100644 index 0000000000..f56d5f679f --- /dev/null +++ b/projects/ppocr/ppocr/utils/range_iterator.hpp @@ -0,0 +1,153 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_UTILS_RANGE_ITERATOR_HPP +#define PPOCR_SRC_UTILS_RANGE_ITERATOR_HPP + +#include +#include +#include + + +namespace ppocr { + +namespace detail_ { + struct DefaultProxy { + DefaultProxy() noexcept {} + template T const & operator()(T const & x) const { return x; } + template T& operator()(T& x) const { return x; } + template T operator()(T&& x) const { return std::forward(x); } + }; + + template + struct pointer_iterator + { + using value_type = T; + using reference = T&; + using const_reference = T const &; + using iterator_tag = std::random_access_iterator_tag; + using difference_type = std::ptrdiff_t; + + pointer_iterator(T * p = nullptr) noexcept + : p_(p) + {} + + pointer_iterator & operator++() noexcept { ++p_; return *this; } + pointer_iterator operator++(int) noexcept { return {p_++}; } + + T & operator*() noexcept { return *p_; } + T const & operator*() const noexcept { return *p_; } + + T & operator[](std::size_t i) noexcept { return p_[i]; } + T const & operator[](std::size_t i) const noexcept { return p_[i]; } + + pointer_iterator operator+(std::ptrdiff_t n) const noexcept { return {p_+n}; } + pointer_iterator & operator+=(std::ptrdiff_t n) noexcept { p_ += n; return *this; } + pointer_iterator & operator-=(std::ptrdiff_t n) noexcept { p_ -= n; return *this; } + + std::ptrdiff_t operator-(pointer_iterator const & other) const noexcept { return p_ - other.p_; } + + bool operator == (pointer_iterator const & other) const noexcept { return p_ == other.p_; } + bool operator != (pointer_iterator const & other) const noexcept { return p_ != other.p_; } + bool operator < (pointer_iterator const & other) const noexcept { return p_ < other.p_; } + bool operator > (pointer_iterator const & other) const noexcept { return p_ > other.p_; } + bool operator <= (pointer_iterator const & other) const noexcept { return p_ <= other.p_; } + bool operator >= (pointer_iterator const & other) const noexcept { return p_ >= other.p_; } + + private: + T * p_; + }; + + template + using force_iterator_class = typename std::conditional< + std::is_pointer::value, + pointer_iterator::type>, + Iterator + >::type; +} + +template +struct proxy_iterator : detail_::force_iterator_class, private Proxy +{ + using iterator_base = detail_::force_iterator_class; + + proxy_iterator(IteratorBase base, Proxy proxy) + : iterator_base(base) + , Proxy(proxy) + {} + + proxy_iterator(IteratorBase base) + : iterator_base(base) + {} + +private: + iterator_base & base_() { return static_cast(*this); } + Proxy & proxy_() { return static_cast(*this); } + +public: + auto operator*() -> decltype(this->proxy_()(*this->base_())) + { return this->proxy_()(*this->base_()); } + + auto operator->() -> decltype(&this->proxy_()(*this->base_())) + { return &this->proxy_()(*this->base_()); } + + auto operator[](std::size_t i) -> decltype(this->proxy_()(*(this->base_()+i))) + { return this->proxy_()(*(this->base_()+i)); } +}; + +template +class range_iterator : Proxy +{ + Iterator first_; + Iterator last_; + + Proxy const & proxy_() const { return static_cast(*this); } + +public: + range_iterator(Iterator first, Iterator last) + : first_(first) + , last_(last) + {} + + range_iterator(Iterator first, Iterator last, Proxy proxy) + : Proxy(proxy) + , first_(first) + , last_(last) + {} + + proxy_iterator begin() const { return {this->first_, this->proxy_()}; } + proxy_iterator end() const { return {this->last_, this->proxy_()}; } + + auto operator[](std::size_t i) const -> decltype(this->proxy_()(*(this->first_+i))) + { return this->proxy_()(*(this->first_+i)); } + + auto front() const -> decltype(this->proxy_()(*this->first_)) + { return this->proxy_()(*this->first_); } + + auto back() const -> decltype(this->proxy_()(*(this->last_-1))) + { return this->proxy_()(*(this->last_-1)); } + + auto size() const -> decltype(this->last_ - this->first_) + { return this->last_ - this->first_; } + + bool empty() const { return this->first_ == this->last_; } +}; + +} + +#endif diff --git a/projects/ppocr/ppocr/utils/read_definitions_file_and_normalize.cpp b/projects/ppocr/ppocr/utils/read_definitions_file_and_normalize.cpp new file mode 100644 index 0000000000..7a2ad6827b --- /dev/null +++ b/projects/ppocr/ppocr/utils/read_definitions_file_and_normalize.cpp @@ -0,0 +1,72 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/read_definitions_file_and_normalize.hpp" + +#include +#include +#include +#include + +#include +#include + +namespace ppocr { + +std::vector read_definitions_file_and_normalize( + const char* filename, DataLoader& loader, std::ostream * out +) { + std::ifstream file(filename); + if (!file) { + throw std::runtime_error(strerror(errno)); + } + + std::vector definitions = read_definitions(file, loader); + + if (!file.eof()) { + throw std::runtime_error("read error"); + } + + if (out) { + *out << "definitions.size = " << definitions.size() << "\n\n"; + } + + std::sort( + definitions.begin(), definitions.end(), + [](Definition const & lhs, Definition const & rhs) { + return lhs.datas < rhs.datas; + } + ); + definitions.erase( + std::unique( + definitions.begin(), definitions.end(), + [](Definition const & lhs, Definition const & rhs) { + return lhs.datas == rhs.datas; + } + ), + definitions.end() + ); + + if (out) { + *out << "unique definitions.size = " << definitions.size() << "\n\n"; + } + + return definitions; +} + +} diff --git a/projects/ppocr/ppocr/utils/read_definitions_file_and_normalize.hpp b/projects/ppocr/ppocr/utils/read_definitions_file_and_normalize.hpp new file mode 100644 index 0000000000..6ed1cf23b9 --- /dev/null +++ b/projects/ppocr/ppocr/utils/read_definitions_file_and_normalize.hpp @@ -0,0 +1,36 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_UTILS_READ_DEFINITIONS_FILE_AND_NORMALIZE_HPP +#define PPOCR_SRC_UTILS_READ_DEFINITIONS_FILE_AND_NORMALIZE_HPP + +#include "factory/definition.hpp" + +#include +#include + +namespace ppocr { + +class DataLoader; + +std::vector +read_definitions_file_and_normalize(char const * file, DataLoader & loader, std::ostream * out = nullptr); + +} + +#endif diff --git a/projects/ppocr/ppocr/utils/read_file.hpp b/projects/ppocr/ppocr/utils/read_file.hpp new file mode 100644 index 0000000000..082c652cc4 --- /dev/null +++ b/projects/ppocr/ppocr/utils/read_file.hpp @@ -0,0 +1,74 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_UTILS_READ_FILE_HPP +#define PPOCR_SRC_UTILS_READ_FILE_HPP + +#include +#include +#include +#include + +namespace ppocr { namespace utils { + +template +void check_read_file(char const * filename, Fn reader) { + std::ifstream file(filename); + + if (!file) { + throw std::runtime_error(std::string(filename) + " : open error: " + strerror(errno)); + } + + reader(file); + + if (!file.eof()) { + throw std::runtime_error(std::string(filename) + " : read error: " + + (errno ? strerror(errno) : "not eof")); + } +} + +template +void read_file(T & x, char const * filename) { + check_read_file(filename, [&](std::istream & file) { file >> x; }); +} + +template +T load_from_file(char const * filename) { + T ret; + check_read_file(filename, [&](std::istream & file) { file >> ret; }); + return ret; +} + + +template +void check_read_file(std::string const & filename, Fn reader) { + check_read_file(filename.c_str(), reader); +} + +template +void read_file(T & x, std::string const &filename) { + read_file(x, filename.c_str()); +} +template +T load_from_file(std::string const & filename) { + return load_from_file(filename.c_str()); +} + +} } + +#endif diff --git a/projects/ppocr/ppocr/utils/reindex.hpp b/projects/ppocr/ppocr/utils/reindex.hpp new file mode 100644 index 0000000000..3c263a2d7f --- /dev/null +++ b/projects/ppocr/ppocr/utils/reindex.hpp @@ -0,0 +1,43 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_UTILS_REINDEX_HPP +#define PPOCR_SRC_UTILS_REINDEX_HPP + +#include +#include + +namespace ppocr { +namespace utils { + +template +void reindex(std::vector const & indexes, std::vector & cont) { + std::vector new_cont; + new_cont.resize(cont.size()); + auto it = new_cont.begin(); + for (auto i : indexes) { + *it = std::move(cont[i]); + ++it; + } + cont = std::move(new_cont); +} + +} +} + +#endif diff --git a/projects/ppocr/ppocr/utils/unique_sort_definition.cpp b/projects/ppocr/ppocr/utils/unique_sort_definition.cpp new file mode 100644 index 0000000000..3911fed119 --- /dev/null +++ b/projects/ppocr/ppocr/utils/unique_sort_definition.cpp @@ -0,0 +1,62 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "ppocr/unique_sort_definition.hpp" +#include "ppocr/utils/image_compare.hpp" + +#include +#include + +namespace ppocr { + +void unique_sort_definitions(std::vector< Definition >& defs) +{ + if (defs.empty()) { + return; + } + + std::sort(defs.begin(), defs.end(), [](Definition const & a, Definition const & b) { + { + int const cmp = a.c.compare(b.c); + if (cmp < 0) { + return true; + } + if (cmp > 0) { + return false; + } + } + + { + int const cmp = a.font_name.compare(b.font_name); + if (cmp < 0) { + return true; + } + if (cmp > 0) { + return false; + } + } + + return image_compare(a.img, b.img) < 0; + }); + + defs.erase(std::unique(defs.begin(), defs.end(), [](Definition const & a, Definition const & b) { + return a.c == b.c && a.font_name == b.font_name && a.img == b.img; + }), defs.end()); +} + +} diff --git a/projects/ppocr/ppocr/utils/unique_sort_definition.hpp b/projects/ppocr/ppocr/utils/unique_sort_definition.hpp new file mode 100644 index 0000000000..196e32c753 --- /dev/null +++ b/projects/ppocr/ppocr/utils/unique_sort_definition.hpp @@ -0,0 +1,32 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_UTILS_UNIQUE_SORT_DEFINITION_HPP +#define PPOCR_UTILS_UNIQUE_SORT_DEFINITION_HPP + +#include "factory/definition.hpp" + +#include + +namespace ppocr { + +void unique_sort_definitions(std::vector & defs); + +} + +#endif diff --git a/projects/ppocr/ppocr/utils/utf.hpp b/projects/ppocr/ppocr/utils/utf.hpp new file mode 100644 index 0000000000..951aae17ca --- /dev/null +++ b/projects/ppocr/ppocr/utils/utf.hpp @@ -0,0 +1,136 @@ +/* +* Copyright (C) 2016 Wallix +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the License, or (at your option) +* any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +* details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef PPOCR_SRC_UTILS_UTF_HPP +#define PPOCR_SRC_UTILS_UTF_HPP + +namespace ppocr { namespace utf { +// TODO redemption classe + class UTF8toUnicodeIterator + { + const uint8_t * source; + uint32_t ucode = 0; + + + public: + UTF8toUnicodeIterator(const uint8_t * str) + : source(str) + { ++*this; } + + UTF8toUnicodeIterator(const char * str) + : UTF8toUnicodeIterator(reinterpret_cast(str)) + {} + + UTF8toUnicodeIterator & operator++() + { + this->ucode = *source; + ++source; + switch (this->ucode >> 4 ){ + case 0: + case 1: case 2: case 3: + case 4: case 5: case 6: case 7: + break; + /* handle U+0080..U+07FF inline : 2 bytes sequences */ + case 0xC: case 0xD: + this->ucode = ((this->ucode & 0x1F) << 6)|(source[0] & 0x3F); + source += 1; + break; + /* handle U+8FFF..U+FFFF inline : 3 bytes sequences */ + case 0xE: + this->ucode = ((this->ucode & 0x0F) << 12)|((source[0] & 0x3F) << 6)|(source[1] & 0x3F); + source += 2; + break; + case 0xF: + this->ucode = ((this->ucode & 0x07) << 18)|((source[0] & 0x3F) << 12)|((source[1] & 0x3F) << 6)|(source[2] & 0x3F); + source += 3; + break; + // these should never happen on valid UTF8 + case 8: case 9: case 0x0A: case 0x0B: + ucode = 0; + break; + } + return *this; + } + + uint32_t operator*() const + { return this->ucode; } + + uint32_t code() const + { return this->ucode; } + + uint8_t const * pos() const + { return this->source; } + }; + + class UTF8Iterator + { + const uint8_t * source; + uint32_t ucode = 0; + + public: + UTF8Iterator(const uint8_t * str) + : source(str) + { ++*this; } + + UTF8Iterator(const char * str) + : UTF8Iterator(reinterpret_cast(str)) + {} + + UTF8Iterator & operator++() + { + this->ucode = *source; + ++source; + switch (this->ucode >> 4 ){ + case 0: + case 1: case 2: case 3: + case 4: case 5: case 6: case 7: + break; + /* handle U+0080..U+07FF inline : 2 bytes sequences */ + case 0xC: case 0xD: + this->ucode = (this->ucode << 8) | source[0]; + source += 1; + break; + /* handle U+8FFF..U+FFFF inline : 3 bytes sequences */ + case 0xE: + this->ucode = (this->ucode << 16) | (source[0] << 8) | source[1]; + source += 2; + break; + case 0xF: + this->ucode = (this->ucode << 24) | (source[0] << 16) | (source[1] << 8) | source[2]; + source += 3; + break; + // these should never happen on valid UTF8 + default: //case 8: case 9: case 0x0A: case 0x0B: + ucode = 0; + break; + } + return *this; + } + + uint32_t operator*() const + { return this->ucode; } + + uint32_t code() const + { return this->ucode; } + + uint8_t const * pos() const + { return this->source; } + }; +} } + +#endif diff --git a/src/capture/ocr/extract_bars.hh b/src/capture/ocr/extract_bars.hh index 69c0124352..08bd6b8bc2 100644 --- a/src/capture/ocr/extract_bars.hh +++ b/src/capture/ocr/extract_bars.hh @@ -328,15 +328,6 @@ namespace ocr f(input, tid, this->context.box(), this->context.col_button); x = this->context.col_button; } -// mln::point2d point2d = this->propagate_old(input, tcolor, xp, y, x_max, y_max); -// if (unsigned(point2d.col()) >= x) { -// if (1 + point2d.col() - xp > ocr::bbox_min_width -// && std::abs(int(point2d.row() - y) - int(this->context.bbox_max_height)) -// <= ocr::bbox_treshold) { -// f(input, tid, mln::box2d(mln::point2d(y,xp), point2d), point2d.col()); -// } -// x = point2d.col(); -// } } } @@ -518,99 +509,7 @@ namespace ocr } this->context.col_first_text = newx; this->context.row_first_text = y; - - //while (this->context.row_first_text < this->context.row_last_text - // && this->is_line_bars( - // input, tcolor, - // this->context.col_first_text, this->context.col_last_text, this->context.row_first_text)) - //{ - // ++this->context.row_first_text; - //} - // - //while (this->context.row_last_text - 1 > this->context.row_first_text - // && this->is_line_bars( - // input, tcolor, - // this->context.col_first_text, this->context.col_last_text, this->context.row_last_text-1)) - //{ - // --this->context.row_last_text; - //} - // - //while (this->context.col_first_text < this->context.col_last_text) { - // unsigned y = this->context.row_first_text; - // for (; y < this->context.row_last_text; ++y) { - // if (tcolor.threshold_chars(input(y, this->context.col_first_text))) { - // break; - // } - // } - // if (y != this->context.row_last_text) { - // break; - // } - // ++this->context.col_first_text; - //} } - - //template - //static bool is_line_bars( - // ImageView const & input, titlebar_color const & tcolor, - // unsigned x, unsigned xmax, unsigned y) - //{ - // for (; x < xmax; ++x) { - // if (tcolor.threshold_chars(input(y, x))) { - // return false; - // } - // } - // return true; - //} - -// template -// mln::point2d propagate_old( -// ImageView const & input, titlebar_color const & tcolor, -// unsigned x, unsigned y, unsigned x_max, unsigned y_max) -// { -// unsigned ih = y + 1; -// unsigned iw = x + 1; -// -// while (ih < y_max && !this->deja_vu[ih * input.width() + iw] && tcolor.is_color_bar(input(ih, iw))) { -// this->deja_vu[ih * input.width() + iw] = true; -// ++ih; -// } -// -// const unsigned hbarre = ih - (y + 1); -// unsigned wbarre = 0; -// -// if (hbarre >= this->context.bbox_min_height && hbarre <= this->context.bbox_max_height + ocr::bbox_treshold) { -// ih = y; -// -// while (iw < x_max && !this->deja_vu[ih * input.width() + iw] && tcolor.is_color_bar(input(ih, iw))) { -// while (ih < y_max && !this->deja_vu[ih * input.width() + iw] -// && tcolor.is_color_bar(input(ih, iw))) { -// ++ih; -// } -// ih = y; -// ++iw; -// ++wbarre; -// } -// -// std::vector::iterator it = this->deja_vu.begin() + y * input.width() + x; -// for (unsigned i = 0; i < hbarre; ++i) { -// std::fill(it, it + wbarre, true); -// it += input.width(); -// } -// -// typedef std::vector::iterator iterator; -// iterator first = this->deja_vu.begin() + y * input.width() + x; -// iterator last = first + input.width() * hbarre; -// for (; first != last; first += input.width()) { -// std::fill(first, first + wbarre, true); -// } -// this->rect_deja_vu.push_back(mln::box2d(mln::point2d(y, x), mln::point2d(y + hbarre, x + wbarre))); -// -// y += hbarre; -// x += wbarre; -// } -// -// return mln::point2d(y, x); -// } }; template diff --git a/src/capture/ocr/fonts.hpp b/src/capture/ocr/fonts.hpp index 35aa1c70f0..c74a3f5f33 100644 --- a/src/capture/ocr/fonts.hpp +++ b/src/capture/ocr/fonts.hpp @@ -73,28 +73,28 @@ namespace fonts { # include "ocr1/latin_classifier.hxx" # include "ocr1/cyrillic_classifier.hxx" - static constexpr Font latin_fonts[] = { + inline constexpr Font latin_fonts[] = { # include "ocr1/common_classifier.names.hxx" # include "ocr1/latin_classifier.names.hxx" }; - static constexpr Font cyrillic_fonts[] = { + inline constexpr Font cyrillic_fonts[] = { # include "ocr1/common_classifier.names.hxx" # include "ocr1/cyrillic_classifier.names.hxx" }; - static constexpr Font const * fonts[] = {latin_fonts, cyrillic_fonts}; + inline constexpr Font const * fonts[] = {latin_fonts, cyrillic_fonts}; using LocaleId = locale::LocaleId; - static constexpr unsigned nfonts[] = { + inline constexpr unsigned nfonts[] = { sizeof(latin_fonts)/sizeof(latin_fonts[0]) , sizeof(cyrillic_fonts)/sizeof(cyrillic_fonts[0]) }; namespace internal { template - /*constexpr*/ inline unsigned min_height(Font const (& fonts)[N]) noexcept + constexpr inline unsigned min_height(Font const (& fonts)[N]) noexcept { unsigned ret = fonts[0].min_height_char; for (unsigned i = 1; i < N; ++i) { @@ -106,7 +106,7 @@ namespace fonts { } template - /*constexpr*/ inline unsigned max_height(Font const (& fonts)[N]) noexcept + constexpr inline unsigned max_height(Font const (& fonts)[N]) noexcept { unsigned ret = 0; for (unsigned i = 0; i < N; ++i) { @@ -118,11 +118,11 @@ namespace fonts { } } // namespace internal - static const/*expr*/ unsigned min_height_font[] = { + inline constexpr unsigned min_height_font[] = { internal::min_height(latin_fonts) , internal::min_height(cyrillic_fonts) }; - static const/*expr*/ unsigned max_height_font[] = { + inline constexpr unsigned max_height_font[] = { internal::max_height(latin_fonts) , internal::max_height(cyrillic_fonts) }; diff --git a/src/capture/ocr/labelize.hh b/src/capture/ocr/labelize.hh index 9f12d7cbaf..aef5290418 100644 --- a/src/capture/ocr/labelize.hh +++ b/src/capture/ocr/labelize.hh @@ -125,13 +125,4 @@ void labelize(std::vector & attributes, const ::mln::image2d } } -inline -std::vector labelize(const ::mln::image2d& input) -{ - std::vector attributes; - labelize(attributes, input); - return attributes; -} - - } //namespace ocr diff --git a/src/capture/ocr/main/display_learning.cc b/src/capture/ocr/main/display_learning.cc index 47f4a6e926..0ef2e09738 100644 --- a/src/capture/ocr/main/display_learning.cc +++ b/src/capture/ocr/main/display_learning.cc @@ -29,6 +29,8 @@ int main(int argc, char** argv) } return 2; } - display_char_box(std::cout, input, ocr::labelize(input)); + std::vector attributes; + ocr::labelize(attributes, input); + display_char_box(std::cout, input, attributes); return 0; }