From 5dcd072e36c1de5afb7bf49b7babebc7183ad2ed Mon Sep 17 00:00:00 2001 From: nobu-g Date: Tue, 14 Mar 2023 00:48:37 +0900 Subject: [PATCH] update meta files --- CHANGELOG.md | 17 ++++++++++++++++- CITATION.cff | 2 +- README.md | 17 +++++++++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50ef0c82..9a527c40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.0.0] - 2023-03-14 + +### Added +- Introduce the seq2seq module for more accurate reading prediction and canonicalization. +- Introduce learning-based sentence splitter. + +### Changed +- Replace RoBERTa-based models with DeBERTaV2-based models. +- Support CUDA 11.7 by default instead of CUDA 10.2. + +### Fixed +- Fix many minor bugs. + ## [1.4.2] - 2023-02-22 ### Fixed @@ -106,7 +119,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - Remove an unnecessary dependency, `fugashi`. -[Unreleased]: https://github.com/ku-nlp/kwja/compare/v1.4.1...HEAD +[Unreleased]: https://github.com/ku-nlp/kwja/compare/v2.0.0...HEAD +[2.0.0]: https://github.com/ku-nlp/kwja/compare/v1.4.2...v2.0.0 +[1.4.2]: https://github.com/ku-nlp/kwja/compare/v1.4.1...v1.4.2 [1.4.1]: https://github.com/ku-nlp/kwja/compare/v1.4.0...v1.4.1 [1.4.0]: https://github.com/ku-nlp/kwja/compare/v1.3.0...v1.4.0 [1.3.0]: https://github.com/ku-nlp/kwja/compare/v1.2.2...v1.3.0 diff --git a/CITATION.cff b/CITATION.cff index 9a461973..9be7c035 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -16,6 +16,6 @@ authors: given-names: Daisuke - family-names: Kurohashi given-names: Sadao -version: 1.0.0 +version: 2.0.0 repository-code: "https://github.com/ku-nlp/kwja" date-released: 2022-09-28 diff --git a/README.md b/README.md index da5b48b8..bd571b63 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ KWJA performs many language analysis tasks, including: ## Requirements -- Python: 3.8+ +- Python: 3.8, 3.9, 3.10 - Dependencies: See [pyproject.toml](./pyproject.toml). ## Getting Started @@ -78,13 +78,16 @@ Here are some other options for `kwja` command: `--typo-batch-size`: Batch size for typo module. +`--seq2seq-batch-size`: Batch size for seq2seq module. + `--char-batch-size`: Batch size for char module. `--word-batch-size`: Batch size for word module. `--tasks`: Tasks to be performed. Please specify 'typo', 'char', 'typo,char', 'char,word', 'typo,char,word', 'char,word,word_discourse' or 'typo,char,word,word_discourse'. - `typo`: Typo correction - - `char`: Tokenization and Word normalization + - `seq2seq`: Word segmentation, Word normalization, Reading prediction, lemmatization, and Canonicalization. + - `char`: Word segmentation and Word normalization - `word`: Morphological analysis, Named entity recognition, Word feature tagging, Dependency parsing, PAS analysis, Bridging reference resolution, and Coreference resolution - `word_discourse`: Discourse relation analysis - If you need the results of discourse relation analysis, please specify this in addition to `word`. @@ -136,6 +139,16 @@ analyzed_document = kwja.apply( } ``` +```bibtex +@InProceedings{児玉2023, + author = {児玉 貴志 and 植田 暢大 and 大村 和正 and 清丸 寛一 and 村脇 有吾 and 河原 大輔 and 黒橋 禎夫}, + title = {テキスト生成モデルによる日本語形態素解析}, + booktitle = {言語処理学会 第29回年次大会}, + year = {2023}, + address = {沖縄}, +} +``` + ## Reference - [KNP format](http://cr.fvcrc.i.nagoya-u.ac.jp/~sasano/knp/format.html)