From b751b25cae2f61dbd763746c9803801fcee2cf72 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Sun, 6 Jun 2021 11:55:17 +0800 Subject: [PATCH 1/2] fix data import tools --- jamdict/tools.py | 27 +++++++++++++-------------- jamdict/util.py | 25 +++++++++++++++++++++---- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/jamdict/tools.py b/jamdict/tools.py index cec5b03..29ac84a 100755 --- a/jamdict/tools.py +++ b/jamdict/tools.py @@ -67,22 +67,21 @@ def import_data(cli, args): '''Generate Jamdict SQLite database from XML data files''' rp = TextReport() t = Timer(report=rp) - db_loc = os.path.abspath(os.path.expanduser(args.jdb)) show_info(cli, args) jam = get_jam(cli, args) - if args and (args.jdb or args.kd2): - if os.path.isfile(db_loc): - if not confirm("Database file exists. Do you want to overwite (This action cannot be undone! yes/no?) "): - cli.logger.warning("Program aborted.") - exit() - else: - os.unlink(db_loc) - # perform input - t.start("Creating Jamdict SQLite database. This process may take very long time ...") - jam.import_data() - t.stop() - else: - print("Database paths were not provided. Process aborted.") + if not jam.db_file: + print("Database path is not available") + elif os.path.isfile(jam.db_file): + if not confirm("Database file exists. Do you want to overwite (This action cannot be undone! yes/no?) "): + cli.logger.warning("Program aborted.") + exit() + else: + os.unlink(jam.db_file) + # perform input + print(f"Importing data to: {jam.db_file}") + t.start("Creating Jamdict SQLite database. This process may take very long time ...") + jam.import_data() + t.stop() def dump_result(results, report=None): diff --git a/jamdict/util.py b/jamdict/util.py index d1c2ac1..12ce147 100644 --- a/jamdict/util.py +++ b/jamdict/util.py @@ -12,6 +12,7 @@ import logging import threading import warnings +from pathlib import Path from collections import defaultdict as dd from collections import OrderedDict from typing import List, Sequence @@ -273,6 +274,13 @@ def __init__(self, db_file=None, kd2_file=None, self.jmd_xml_file = jmd_xml_file if jmd_xml_file else config.get_file('JMDICT_XML') if auto_config else None self.kd2_xml_file = kd2_xml_file if kd2_xml_file else config.get_file('KD2_XML') if auto_config else None self.jmnedict_xml_file = jmnedict_xml_file if jmnedict_xml_file else config.get_file('JMNEDICT_XML') if auto_config else None + if auto_expand: + if self.jmd_xml_file: + self.jmd_xml_file = os.path.expanduser(self.jmd_xml_file) + if self.kd2_xml_file: + self.kd2_xml_file = os.path.expanduser(self.kd2_xml_file) + if self.jmnedict_xml_file: + self.jmnedict_xml_file = os.path.expanduser(self.jmnedict_xml_file) self.db_file = db_file if db_file else config.get_file('JAMDICT_DB') if auto_config else None if not self.db_file or (self.db_file != ':memory:' and not os.path.isfile(self.db_file)): @@ -475,31 +483,40 @@ def is_available(self) -> bool: def import_data(self): """ Import JMDict and KanjiDic2 data from XML to SQLite """ + if self.db_file and not os.path.exists(self.db_file): + Path(self.db_file).touch() ctx = self.__make_db_ctx() ctx.buckmode() + ctx.auto_commit = False if self.jmdict and self.jmdict_xml: getLogger().info("Importing JMDict data") self.jmdict.insert_entries(self.jmdict_xml, ctx=ctx) # import KanjiDic2 - if self.kd2 is not None and self.kd2_xml and os.path.isfile(self.kd2_xml_file): + if self.kd2_xml is not None and os.path.isfile(self.kd2_xml_file): getLogger().info("Importing KanjiDic2 data") if self.jmdict is not None and self.kd2_file == self.db_file: self.jmdict.insert_chars(self.kd2_xml, ctx=ctx) - else: + elif self.kd2 is not None: getLogger().warning(f"Building Kanjidic2 DB using a different DB context {self.kd2_file} vs {self.db_file}") with self.kd2.ctx() as kd_ctx: self.kd2.insert_chars(self.kd2_xml, ctx=kd_ctx) + else: + getLogger().warning(f"Kanjidic2 DB path could not be found") else: + print(f"kd2_xml: {self.kd2_xml}") + print(f"kd2_xml_file: {self.kd2_xml_file}") getLogger().warning("KanjiDic2 XML data is not available - skipped!") # import JMNEdict - if self.jmnedict is not None and self.jmne_xml and os.path.isfile(self.jmnedict_xml_file): + if self.jmne_xml is not None and os.path.isfile(self.jmnedict_xml_file): getLogger().info("Importing JMNEdict data") if self.jmdict is not None and self.jmnedict_file == self.db_file: self.jmnedict.insert_name_entities(self.jmne_xml, ctx=ctx) - else: + elif self.jmnedict is not None: getLogger().warning(f"Building Kanjidic2 DB using a different DB context {self.jmne_file} vs {self.db_file}") with self.jmnedict.ctx() as ne_ctx: self.jmnedict.insert_name_entities(self.jmne_xml, ctx=ne_ctx) + else: + getLogger().warning(f"JMNE DB path could not be found") else: getLogger().warning("JMNEdict XML data is not available - skipped!") _buckmode_off = getattr(ctx, "buckmode_off", None) From 9eba8156d6c8f71952516d2598583e803aa7dcd8 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Sun, 6 Jun 2021 12:02:30 +0800 Subject: [PATCH 2/2] pump version to 0.1a11.post2 --- jamdict/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jamdict/__version__.py b/jamdict/__version__.py index 0f67c23..ada2c9d 100644 --- a/jamdict/__version__.py +++ b/jamdict/__version__.py @@ -13,7 +13,7 @@ # Version configuration (enforcing PEP 440) # ------------------------------------------------------------------------------ __status__ = "3 - Alpha" -__version_tuple__ = (0, 1, 0, 11, 1) +__version_tuple__ = (0, 1, 0, 11, 2) __version_status__ = '' # a specific value ('rc', 'dev', etc.) or leave blank to be auto-filled # ------------------------------------------------------------------------------ __status_map__ = {'3 - Alpha': 'a', '4 - Beta': 'b', '5 - Production/Stable': '', '6 - Mature': ''}