diff --git a/.github/workflows/deno.yml b/.github/workflows/deno.yml index e26630f..5862004 100644 --- a/.github/workflows/deno.yml +++ b/.github/workflows/deno.yml @@ -35,7 +35,7 @@ jobs: - name: Upload artifact uses: actions/upload-pages-artifact@v3 with: - path: "." + path: "/dist/" - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index a9b203a..e31aa26 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ -main.js +.vscode/ +dictionary/dictionary.ts +dist/main.js +telo-misikeke/rules.js +telo-misikeke/Parser.js diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 34343fc..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "cSpell.customDictionaries": { - "nimi-ku-suli": { - "name": "ku-suli", - "path": "${workspaceRoot}/nimi-ku-suli.txt", - "description": "nimi ku sili", - "addWords": true - } - }, - "deno.enable": true -} diff --git a/CHANGELOG.md b/CHANGELOG.md index c5f3e60..3a4c8b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,42 @@ # Changelog -You may need to force restart the page in order to use the latest version: shift + click the restart button; or ctrl + shift + R. + + +## 0.3.0 + +This is a huge update now with better quality translations, configurable settings, UCSUR support, and expanded vocabulary! + +- Reimplement the word "a". This were dropped due to parser rewrite. +- The vocabulary has been expanded to _nimi ku suli_ plus _nimi su!_. +- New "dictionary mode", just enter a single word and ilo Token will output all definition from its own dictionary. This also works for particles. To bypass this and translate the word as if it is the whole sentence, just add a period. +- Reimplement the "a" particle. +- Implement UCSUR support! It supports: + - Cartouche with nasin sitelen kalama + - Combined glyphs + - Long glyphs + - (Deprecated characters and combiners are not supported) +- Implement [nasin nanpa pona](https://sona.pona.la/wiki/nasin_nanpa_pona). +- Implement settings dialog. [More info](https://github.com/ilo-token/ilo-token.github.io/wiki/Settings-Help). +- Changes in error messages: + - All possible errors will now be listed. + - ilo Token now uses telo misikeke for error messages. This can be disabled from the settings. +- Multiline text will no longer be recognized. +- Add icons. + +You may not notice this, we take good grammar for granted, but ilo Token now has generally better quality translations thanks to the following: + +- It is now aware determiners are separate from adjectives. So you won't see adjectives like "nicely my", since adverbs can't modify determiners. +- It tries to ensure adjectives are in proper order. Yes this matters, it's "big red fruit" and not "red big fruit". +- Just like adjectives, determiners are also ordered, but unlike adjectives, they're also filtered (some combinations are not shown). You won't see "my your animal". +- It is aware of grammatical numbers. So you won't see "2 stick" or "1 sticks". + +Inside update (intended for developers): + +- Implement lexer and english AST. +- Overhaul dictionary: It is now a separate file with nicer syntax as opposed to written inside the code. ## 0.2.2 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..f6ad4a6 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +neverrare@proton.me. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 75f7eb2..8b49af6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,56 @@ # Contributing -You can do the usual github stuff: Open issue if there's an issue or you have a suggestion; Open pull request if you want to propose changes. If you want to propose a large change however, please open an issue first (or comment on an already existing issue page), and wait for my signal before beginning to work. +Thank you so much for considering to contribute! Here are useful information about the pages to get you started. Remember you can always ask for help in the [discussion](#Discussion) or privately through my email: [neverrare@proton.me](mailto:neverrare@proton.me) -## The wiki +## [Wiki](https://github.com/ilo-token/ilo-token.github.io/wiki) -The wiki provides useful information for contributors, although it's not complete yet. Check it out: [Visit wiki](https://github.com/neverRare/ilo-token/wiki). +The wiki is a place full of useful information for contributors from how the code works to guidelines for editing the dictionary! + +It's not perfect however. If you spot a mistake, please open an issue. + +## [Issue](https://github.com/ilo-token/ilo-token.github.io/issues) + +The issue page is intended for tracking the development of ilo Token as well as its wiki. You may open an issue for: + +- Bug report +- Feature request +- Fix suggestion to the wiki + +Please remember to search first before opening an issue, it might already exist! Duplicate issues are unnecessary. + +## [Discussion](https://github.com/ilo-token/ilo-token.github.io/discussions) + +This GitHub repository have discussion, a dedicated forum page and shall serve as a public space for ilo Token development. You may open a new page for: + +- Suggestion for translation e.g. how it can be improved +- Questions +- Any opinions or suggestions you want to share + +Please search first before opening a new page! Duplicate pages are unnecessary. + +## [Pull request](https://github.com/ilo-token/ilo-token.github.io/pulls) + +Before forking and editing, please claim an issue first or open an issue then claim it. After that, you can start away. This is necessary to avoid wasted duplicate efforts. + +You **don't** have to do this if: + +- It is a contribution to the dictionary +- It is a minor edit e.g. a typo fix + +[The wiki](https://github.com/ilo-token/ilo-token.github.io/wiki) contains useful for contributors. + +As a thank you for contributing, you'll get a shout out in the changelog! + +## Alternative contact options + +If you have no github account or want to provide feedback privately, these are alternative contact options: + +- [Google forms](https://docs.google.com/forms/d/e/1FAIpQLSfdDEMbde9mieybZdbZr8haRzNzGsg0BVkuTIzuHaATCdcrlw/viewform?usp=sf_link) +- Email: [neverrare@proton.me](mailto:neverrare@proton.me) + +ilo Token have a dedicated space for the following Discord server. Although these server themselves serves as a space for broader topics, not just ilo Token. + +- [ma pona pi toki pona](https://discord.gg/Byqn5z9) ([Thread for ilo Token](https://discord.com/channels/301377942062366741/1053538532993548320)): A Discord server for Toki Pona. +- [r/ProgrammingLanguages](https://discord.gg/4Kjt3ZE) ([Channel for ilo Token](https://discord.com/channels/530598289813536771/1224854915214737522)): A Discord server for programming language development. While ilo Token isn't a programming language, it uses similar techniques found in programming language development e.g. parsing. + +These are unnoficial spaces and are not subject to the [Contributor Covenant Code of Conduct](https://github.com/ilo-token/ilo-token.github.io/blob/master/CODE_OF_CONDUCT.md). Instead, each have its own rules and different moderators. diff --git a/README.md b/README.md index bc6a685..33f6822 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,20 @@ # ilo Token -> The repository and the website uses to old name, it will be renamed on the next update. - A rule-based Toki Pona to English translator that translates into multiple sentences. This emphasizes how broad Toki Pona can be. Everything is hardcoded, no machine learning involved. -[Try it](https://neverrare.github.io/ilo-token/) +[Try it](https://ilo-token.github.io/) -## Building +**It is work in progress!** [We welcome contributors however!](./CONTRIBUTING.md) -This builds `./main.js` as a minified file ready for production use. +## Dependencies -You'll need [Deno](https://deno.com/). Run the following command. +You'll need the following in order to run commands: + +- [Deno](https://deno.com/) + +## Building + +This fetches more dependencies needed, builds the dictionary, and builds `./dist/main.js` as a minified file ready for production use. If you made changes to the dictionary, you'll need to run this again. ``` deno task build @@ -18,9 +22,9 @@ deno task build ## Watching -This builds `./main.js` as a non-minified file with source mapping, intended for testing and debugging. This command also watches the source codes in `./src/` path and rebuilds `./main.js` whenever there are changes. +Before running this command, you'll need to run `deno task build` first. This is because `deno task watch` doesn't fetch dependencies nor builds the dictionary. You'll only need to run this command once. -You'll need [Deno](https://deno.com/). Run the following command. +This builds `./dist/main.js` as a non-minified file with source mapping, intended for testing and debugging. This command also watches the source codes in `./src/` path and rebuilds `./dist/main.js` whenever there are changes. ``` deno task watch @@ -28,6 +32,8 @@ deno task watch To stop this command, simply press Ctrl + C. -## About the source codes +## Running locally + +After building or watching, you can directly run `./dist/index.js` using your favorite browser with some caveat however: UCSUR characters will display as tofu. -With exception to `./src/main.ts`, every source codes in `./src/` are environment agnostic. Meaning it can be run on Deno as well. This makes it convenient to directly test codes by using `deno run`. +This could be mitigated by making use of local server but I didn't do that, there's little need for that. diff --git a/bundle.ts b/bundle.ts index 830ee6b..bf80a28 100644 --- a/bundle.ts +++ b/bundle.ts @@ -1,29 +1,84 @@ -import { emit } from "./dev-deps.ts"; -import { debounce } from "./dev-deps.ts"; +import { bundle } from "@deno/emit"; +import { buildTeloMisikeke } from "telo-misikeke/build.ts"; +import { buildDictionary } from "dictionary/build.ts"; -const SOURCE = "./src/main.ts"; -const DESTINATION = "./main.js"; +const SOURCE = new URL("./src/main.ts", import.meta.url); +const DESTINATION = new URL("./dist/main.js", import.meta.url); +const IMPORT_MAP = new URL("./deno.json", import.meta.url); -const url = new URL(SOURCE, import.meta.url); - -async function build(options: emit.BundleOptions): Promise { - const result = await emit.bundle(url, options); - const { code } = result; - await Deno.writeTextFile(DESTINATION, code); -} -if (Deno.args[0] === "build") { - await build({ minify: true }); -} else if (Deno.args[0] === "watch") { - const builder = debounce.debounce(async () => { - console.log("Starting to build..."); - await build({ compilerOptions: { inlineSourceMap: true } }); +switch (Deno.args[0]) { + case "build": { + console.log("Building telo misikeke..."); + await buildTeloMisikeke(); + console.log("Building dictionary..."); + if (!await buildDictionary()) { + break; + } + console.log("Building main.js..."); + const bundled = await bundle(SOURCE, { + type: "classic", + importMap: IMPORT_MAP, + }); + const useStrict = addUseStrict(bundled.code); + const { stop, transform } = await import("esbuild"); + const minified = await transform(useStrict, { minify: true }); + await stop(); + await Deno.writeTextFile(DESTINATION, minified.code); console.log("Building done!"); - }, 500); - const watcher = Deno.watchFs("./src/"); - builder(); - for await (const _ of watcher) { - builder(); + break; + } + case "watch": { + const builder = debounce(async () => { + console.log("Starting to build..."); + try { + const { code } = await bundle(SOURCE, { + compilerOptions: { inlineSourceMap: true }, + type: "classic", + importMap: IMPORT_MAP, + }); + const useStrict = addUseStrict(code); + await Deno.writeTextFile(DESTINATION, useStrict); + console.log("Building done!"); + } catch (error) { + console.error(error); + } + }, 500); + const watcher = Deno.watchFs([ + "./src/", + "./telo-misikeke/", + "./dictionary/", + ]); + try { + builder(); + for await (const _ of watcher) { + builder(); + } + } finally { + watcher.close(); + } + throw new Error("unreachable"); } -} else { - throw new Error(`Unrecognized build option, ${Deno.args[0]}`); + default: + throw new Error(`Unrecognized build option, ${Deno.args[0]}`); +} +function addUseStrict(src: string): string { + return src.replace(/\(\s*function\s*\(\s*\)\s*\{/, '$&"use strict";'); +} +function debounce(callback: () => Promise, delay: number): () => void { + let previous = { aborted: true }; + let current = Promise.resolve(); + return () => { + previous.aborted = true; + const newPrevious = { aborted: false }; + setTimeout(() => { + if (!newPrevious.aborted) { + current = current + .then(() => callback()) + .catch((error) => { + throw error; + }); + } + }, delay); + previous = newPrevious; + }; } diff --git a/deno.json b/deno.json index 3d20008..6e6f669 100644 --- a/deno.json +++ b/deno.json @@ -1,7 +1,5 @@ { - "lock": false, "compilerOptions": { - "target": "esnext", "lib": ["dom", "dom.iterable", "dom.asynciterable", "deno.ns"] }, "tasks": { @@ -9,9 +7,16 @@ "watch": "deno run --allow-read --allow-write --allow-env --allow-net ./bundle.ts watch" }, "fmt": { - "include": ["./src/**/*.ts", "./bundle.ts", "./test-parser.ts"] + "include": ["./**/*.ts", "./**/*.js"] }, "lint": { - "include": ["./src/**/*.ts", "./bundle.ts", "./test-parser.ts"] + "include": ["./**/*.ts", "./**/*.js"] + }, + "imports": { + "compromise": "npm:compromise@14.14.0", + "dictionary/": "./dictionary/", + "@deno/emit": "jsr:@deno/emit@0.44.0", + "esbuild": "https://deno.land/x/esbuild@v0.23.0/wasm.js", + "telo-misikeke/": "./telo-misikeke/" } } diff --git a/deno.lock b/deno.lock new file mode 100644 index 0000000..da648e6 --- /dev/null +++ b/deno.lock @@ -0,0 +1,102 @@ +{ + "version": "3", + "packages": { + "specifiers": { + "jsr:@deno/cache-dir@0.8": "jsr:@deno/cache-dir@0.8.0", + "jsr:@deno/emit@0.44.0": "jsr:@deno/emit@0.44.0", + "jsr:@std/assert@^0.218.2": "jsr:@std/assert@0.218.2", + "jsr:@std/assert@^0.223.0": "jsr:@std/assert@0.223.0", + "jsr:@std/bytes@^0.218.2": "jsr:@std/bytes@0.218.2", + "jsr:@std/fmt@^0.218.2": "jsr:@std/fmt@0.218.2", + "jsr:@std/fs@^0.218.2": "jsr:@std/fs@0.218.2", + "jsr:@std/io@^0.218.2": "jsr:@std/io@0.218.2", + "jsr:@std/path@^0.218.2": "jsr:@std/path@0.218.2", + "jsr:@std/path@^0.223.0": "jsr:@std/path@0.223.0", + "npm:compromise@14.14.0": "npm:compromise@14.14.0" + }, + "jsr": { + "@deno/cache-dir@0.8.0": { + "integrity": "e87e80a404958f6350d903e6238b72afb92468378b0b32111f7a1e4916ac7fe7", + "dependencies": [ + "jsr:@std/fmt@^0.218.2", + "jsr:@std/fs@^0.218.2", + "jsr:@std/io@^0.218.2", + "jsr:@std/path@^0.218.2" + ] + }, + "@deno/emit@0.44.0": { + "integrity": "0750d6b39780a2975915a8a0a513b817338c21749022545a713c49af52e21961", + "dependencies": [ + "jsr:@deno/cache-dir@0.8", + "jsr:@std/path@^0.223.0" + ] + }, + "@std/assert@0.218.2": { + "integrity": "7f0a5a1a8cf86607cd6c2c030584096e1ffad27fc9271429a8cb48cfbdee5eaf" + }, + "@std/assert@0.223.0": { + "integrity": "eb8d6d879d76e1cc431205bd346ed4d88dc051c6366365b1af47034b0670be24" + }, + "@std/bytes@0.218.2": { + "integrity": "91fe54b232dcca73856b79a817247f4a651dbb60d51baafafb6408c137241670" + }, + "@std/fmt@0.218.2": { + "integrity": "99526449d2505aa758b6cbef81e7dd471d8b28ec0dcb1491d122b284c548788a" + }, + "@std/fs@0.218.2": { + "integrity": "dd9431453f7282e8c577cc22c9e6d036055a9a980b5549f887d6012969fabcca" + }, + "@std/io@0.218.2": { + "integrity": "c64fbfa087b7c9d4d386c5672f291f607d88cb7d44fc299c20c713e345f2785f", + "dependencies": [ + "jsr:@std/assert@^0.218.2", + "jsr:@std/bytes@^0.218.2" + ] + }, + "@std/path@0.218.2": { + "integrity": "b568fd923d9e53ad76d17c513e7310bda8e755a3e825e6289a0ce536404e2662", + "dependencies": [ + "jsr:@std/assert@^0.218.2" + ] + }, + "@std/path@0.223.0": { + "integrity": "593963402d7e6597f5a6e620931661053572c982fc014000459edc1f93cc3989", + "dependencies": [ + "jsr:@std/assert@^0.223.0" + ] + } + }, + "npm": { + "compromise@14.14.0": { + "integrity": "sha512-0plNVaC0bGWZY3TeijP1xndyaAmjqBY1SSfsc/7ruVLTIfRCSHFLSd/uuK7ZTcaT/4y2u0lVn0et4tmmC98mRg==", + "dependencies": { + "efrt": "efrt@2.7.0", + "grad-school": "grad-school@0.0.5", + "suffix-thumb": "suffix-thumb@5.0.2" + } + }, + "efrt@2.7.0": { + "integrity": "sha512-/RInbCy1d4P6Zdfa+TMVsf/ufZVotat5hCw3QXmWtjU+3pFEOvOQ7ibo3aIxyCJw2leIeAMjmPj+1SLJiCpdrQ==", + "dependencies": {} + }, + "grad-school@0.0.5": { + "integrity": "sha512-rXunEHF9M9EkMydTBux7+IryYXEZinRk6g8OBOGDBzo/qWJjhTxy86i5q7lQYpCLHN8Sqv1XX3OIOc7ka2gtvQ==", + "dependencies": {} + }, + "suffix-thumb@5.0.2": { + "integrity": "sha512-I5PWXAFKx3FYnI9a+dQMWNqTxoRt6vdBdb0O+BJ1sxXCWtSoQCusc13E58f+9p4MYx/qCnEMkD5jac6K2j3dgA==", + "dependencies": {} + } + } + }, + "remote": { + "https://deno.land/x/esbuild@v0.20.2/wasm.js": "5a887c1e38ad1056af11c58d45b6084d33bd33a62afa480d805801739370eed0", + "https://deno.land/x/esbuild@v0.23.0/wasm.js": "3ae25d5ac47af19b2aff2801107aa848339a3ee04431a44b73e93a0991fa5160" + }, + "workspace": { + "dependencies": [ + "jsr:@deno/emit@0.44.0", + "npm:compromise@14.14.0" + ] + } +} diff --git a/dev-deps.ts b/dev-deps.ts deleted file mode 100644 index e876059..0000000 --- a/dev-deps.ts +++ /dev/null @@ -1,2 +0,0 @@ -export * as emit from "https://deno.land/x/emit@0.35.0/mod.ts"; -export * as debounce from "https://deno.land/std@0.213.0/async/debounce.ts"; diff --git a/dictionary/build.ts b/dictionary/build.ts new file mode 100644 index 0000000..2af0e1b --- /dev/null +++ b/dictionary/build.ts @@ -0,0 +1,521 @@ +import nlp from "compromise/three"; +import { + Adjective, + AdjectiveType, + Definition, + Determiner, + DeterminerType, + Dictionary, + Noun, +} from "./type.ts"; +import { + all, + choiceOnlyOne, + eol, + match, + optionalAll, + Parser, + sequence, +} from "../src/parser-lib.ts"; +import { OutputError } from "../src/output.ts"; +import { UnrecognizedError } from "../src/error.ts"; +import { nullableAsArray, repeat } from "../src/misc.ts"; + +const SOURCE = new URL("./dictionary", import.meta.url); +const DESTINATION = new URL("./dictionary.ts", import.meta.url); + +function space(): Parser { + return all( + choiceOnlyOne(match(/\s/, "space"), match(/#[^\n]*/, "comment")), + ) + .map((_) => null); +} +function lex(parser: Parser): Parser { + return parser.skip(space()); +} +function word(): Parser { + return all( + choiceOnlyOne( + match(/`([^`]*)`/, "quoted words").map(([_, words]) => words), + match(/#[^\n]*/, "comment").map((_) => ""), + match(/[^():;#/`]/, "word").map(([character]) => character), + ), + ) + .map((word) => word.join("").replaceAll(/\s+/g, " ").trim()) + .filter((word) => word.length > 0); +} +function slash(): Parser { + return lex(match(/\//, "slash")).map((_) => null); +} +function forms(): Parser> { + return sequence(word(), all(slash().with(word()))) + .map(([first, rest]) => [first, ...rest]); +} +function keyword(keyword: T): Parser { + return lex(match(/[a-z]+/, keyword)) + .map(([keyword]) => keyword) + .filter((that) => that === keyword) as Parser; +} +function number(): Parser<"singular" | "plural"> { + return choiceOnlyOne(keyword("singular"), keyword("plural")); +} +function optionalNumber(): Parser { + return optionalAll(number()); +} +function tag(parser: Parser): Parser { + return lex(match(/\(/, "open parenthesis")) + .with(parser) + .skip(lex(match(/\)/, "open parenthesis"))); +} +function template(parser: Parser): Parser { + return lex(match(/\[/, "open parenthesis")) + .with(parser) + .skip(lex(match(/\]/, "open parenthesis"))); +} +function simpleUnit(kind: string): Parser { + return word().skip(tag(keyword(kind))); +} +function conjugate(verb: string): { + presentSingular: string; + presentPlural: string; + past: string; +} { + const sentence = nlp(verb); + sentence.tag("Verb"); + const conjugations = sentence.verbs().conjugate()[0] as undefined | { + Infinitive: string; + PastTense: string; + PresentTense: string; + Gerund: string; + FutureTense: string; + }; + if (conjugations == null) { + throw new OutputError(`no verb conjugation found for ${verb}`); + } + return { + presentSingular: conjugations.PresentTense, + presentPlural: conjugations.Infinitive, + past: conjugations.PastTense, + }; +} +function detectRepetition( + source: Array, +): { before: string; repeat: string; after: string } { + if (source.length === 1) { + return { before: source[0], repeat: "", after: "" }; + } + const [first, ...rest] = source; + if (first.length <= 0) { + throw new UnrecognizedError('no word before "/"'); + } + for (let i = 0; i < first.length; i++) { + const before = first.slice(0, i); + const repeatString = first.slice(i, i + 1); + const after = first.slice(i + 1); + const passed = [...rest.entries()] + .every(([i, test]) => + test === `${before}${repeat(repeatString, i + 2)}${after}` + ); + if (passed) { + return { before, repeat: repeatString, after }; + } + } + throw new OutputError(`${source} has no repetition pattern found`); +} +function nounOnly(): Parser< + { singular: null | string; plural: null | string; gerund: boolean } +> { + return sequence( + word(), + optionalAll(slash().with(word())), + tag( + keyword("n") + .with(sequence(optionalAll(keyword("gerund")), optionalNumber())), + ), + ) + .map(([first, second, [gerund, number]]) => { + let singular: null | string = null; + let plural: null | string = null; + switch (number) { + case null: { + if (second == null) { + const sentence = nlp(first); + sentence.tag("Noun"); + singular = sentence + .nouns() + .toSingular() + .text(); + plural = sentence + .nouns() + .toPlural() + .text(); + if (singular === "" || plural === "") { + throw new OutputError( + `no singular or plural form found for ${first}`, + ); + } + } else { + singular = first; + plural = second; + } + break; + } + case "singular": + case "plural": + if (second != null) { + throw new OutputError( + "number inside tag may not be provided when two forms of noun are already provided", + ); + } + switch (number) { + case "singular": + singular = first; + break; + case "plural": + plural = first; + break; + } + break; + } + return { singular, plural, gerund: gerund != null }; + }); +} +function noun(): Parser { + return sequence( + all(determiner()), + all(adjective()), + nounOnly(), + optionalAll( + sequence(simpleUnit("adj"), word()) + .skip(tag(sequence(keyword("proper"), keyword("n")))), + ), + ) + .map(([determiner, adjective, noun, post]) => { + let postAdjective: null | { adjective: string; name: string }; + if (post == null) { + postAdjective = null; + } else { + postAdjective = { adjective: post[0], name: post[1] }; + } + return { + determiner, + adjective, + ...noun, + postAdjective, + }; + }); +} +function determinerType(): Parser { + return choiceOnlyOne( + keyword("article"), + keyword("demonstrative"), + keyword("distributive"), + keyword("interrogative"), + keyword("possessive"), + keyword("quantifier"), + keyword("negative"), + ); +} +function adjectiveKind(): Parser { + return choiceOnlyOne( + keyword("opinion"), + keyword("size"), + sequence(keyword("physical"), keyword("quality")) + .map((_) => "physical quality"), + keyword("age"), + keyword("color"), + keyword("origin"), + keyword("material"), + keyword("qualifier"), + ); +} +function determiner(): Parser { + return sequence( + word(), + optionalAll(slash().with(word())), + tag(keyword("d").with(sequence(determinerType(), optionalNumber()))), + ) + .map(([determiner, plural, [kind, number]]) => ({ + determiner, + plural, + kind, + number: number ?? "both", + })); +} +function adjective(): Parser { + return sequence( + all(simpleUnit("adv")), + word(), + tag(keyword("adj").with(adjectiveKind())), + ) + .map(([adverb, adjective, kind]) => ({ adverb, adjective, kind })); +} +function semicolon(): Parser { + return lex(match(/;/, "semicolon")).map((_) => null); +} +function definition(): Parser { + return choiceOnlyOne( + forms().skip(tag(keyword("f"))) + .skip(semicolon()) + .map((unit) => + ({ + type: "filler", + ...detectRepetition(unit), + }) as Definition + ), + word().skip(tag(sequence(keyword("particle"), keyword("def")))) + .skip(semicolon()) + .map((definition) => + ({ type: "particle definition", definition }) as Definition + ), + noun() + .skip(semicolon()) + .map((noun) => ({ type: "noun", ...noun }) as Definition), + sequence(noun(), simpleUnit("prep")) + .skip(template(keyword("headword"))) + .skip(semicolon()) + .map(([noun, preposition]) => + ({ + type: "noun preposition", + noun, + preposition, + }) as Definition + ), + sequence( + word(), + slash().with(word()), + slash().with(word()), + slash().with(word()), + ) + .skip(tag(sequence(keyword("personal"), keyword("pronoun")))) + .skip(semicolon()) + .map(([singularSubject, singularObject, pluralSubject, pluralObject]) => + ({ + type: "personal pronoun", + singular: { subject: singularSubject, object: singularObject }, + plural: { subject: pluralSubject, object: pluralObject }, + }) as Definition + ), + sequence( + word(), + slash().with(word()), + tag(keyword("personal").with(keyword("pronoun")).with(number())), + ) + .skip(semicolon()) + .map(([subject, object, number]) => + ({ + type: "personal pronoun", + singular: null, + plural: null, + [number]: { subject, object }, + }) as Definition + ), + determiner() + .skip(semicolon()) + .map((determiner) => + ({ type: "determiner", ...determiner }) as Definition + ), + simpleUnit("num") + .skip(semicolon()) + .map((unit) => { + const numeral = Number.parseInt(unit); + if (Number.isNaN(numeral)) { + throw new UnrecognizedError("non-number on numeral"); + } else { + return { type: "numeral", numeral } as Definition; + } + }), + adjective() + .skip(semicolon()) + .map((adjective) => ({ type: "adjective", ...adjective }) as Definition), + sequence( + adjective(), + simpleUnit("c").filter((word) => word === "and").with(adjective()), + ) + .filter(([first, second]) => + first.adverb.length === 0 && second.adverb.length === 0 + ) + .skip(semicolon()) + .map((adjective) => + ({ type: "compound adjective", adjective }) as Definition + ), + simpleUnit("adv") + .skip(semicolon()) + .map((adverb) => ({ type: "adverb", adverb }) as Definition), + sequence( + simpleUnit("v"), + optionalAll(template(keyword("object"))), + optionalAll( + sequence(simpleUnit("prep"), noun()) + .map(([preposition, object]) => ({ preposition, object })), + ) + .map(nullableAsArray), + ) + .skip(semicolon()) + .map(([verb, forObject, indirectObject]) => + ({ + type: "verb", + ...conjugate(verb), + directObject: null, + indirectObject, + forObject: forObject != null, + }) as Definition + ), + sequence( + simpleUnit("v"), + optionalAll(noun()), + optionalAll(simpleUnit("prep").skip(template(keyword("object")))), + ) + .skip(semicolon()) + .map(([verb, directObject, preposition]) => + ({ + type: "verb", + ...conjugate(verb), + directObject, + indirectObject: [], + forObject: preposition ?? false, + }) as Definition + ), + sequence(simpleUnit("v"), optionalAll(simpleUnit("particle"))) + .skip(template(sequence(keyword("predicate"), keyword("v")))) + .skip(semicolon()) + .map(([verb, particle]) => + ({ + type: "preverb as finite verb", + ...conjugate(verb), + particle, + }) as Definition + ), + word() + .skip(tag(sequence(keyword("linking"), keyword("v")))) + .skip(template(keyword("predicate"))) + .skip(semicolon()).map((linkingVerb) => + ({ + type: "preverb as linking verb", + linkingVerb, + }) as Definition + ), + word() + .skip(tag(sequence(keyword("modal"), keyword("v")))) + .skip(template(keyword("predicate"))) + .skip(semicolon()).map((verb) => + ({ + type: "preverb as modal verb", + verb, + }) as Definition + ), + simpleUnit("prep") + .skip(semicolon()) + .map((preposition) => + ({ type: "preposition", preposition }) as Definition + ), + simpleUnit("i") + .skip(semicolon()) + .map((preposition) => + ({ type: "interjection", interjection: preposition }) as Definition + ), + ); +} +function singleWord(): Parser { + return lex(match(/[a-z]+/, "word")).map(([word]) => word); +} +function head(): Parser> { + return sequence( + all(singleWord().skip(lex(match(/,/, "comma")))), + singleWord(), + ) + .skip(lex(match(/:/, "colon"))) + .map(([init, last]) => [...init, last]); +} +const dictionary = space() + .with(all(sequence(head(), all(definition())))) + .skip(eol()) + .map((entries) => { + const dictionary: Dictionary = {}; + for (const [words, definitions] of entries) { + for (const word of words) { + dictionary[word] = definitions; + } + } + return dictionary; + }); +const insideDefinitionParser = space().with(definition()).skip(eol()); + +export async function buildDictionary(): Promise { + const sourceText = await Deno.readTextFile(SOURCE); + const output = dictionary.parse(sourceText); + if (output.isError()) { + const rawTexts = space() + .with(all( + optionalAll(head()) + .with( + lex(match(/[^;]*;/, "definition")) + .map(([definition]) => definition), + ), + )) + .skip(eol()) + .parse(sourceText); + for (const text of rawTexts.output[0]) { + const errors = insideDefinitionParser.parse(text).errors; + if (errors.length > 0) { + console.error(`error with definition ${text}`); + for (const error of errors) { + console.error(error.message); + } + console.error(); + } + } + return false; + } else { + const dictionary = output.output[0]; + const contentWords = Object + .entries(dictionary) + .filter(([_, definitions]) => + definitions.some((definition) => + definition.type !== "filler" && + definition.type !== "particle definition" + ) + ); + const noNouns = contentWords + .filter(([_, definitions]) => + definitions.every((definition) => + definition.type !== "noun" && + definition.type !== "personal pronoun" && + definition.type !== "numeral" + ) + ) + .map(([word]) => word); + if (noNouns.length > 0) { + console.warn("the following doesn't have noun nor pronoun definition"); + for (const word of noNouns) { + console.warn(word); + } + console.warn(); + } + const noAdjectives = contentWords + .filter(([_, definitions]) => + definitions.every((definition) => + definition.type !== "adjective" && + definition.type !== "compound adjective" && + definition.type !== "determiner" && + definition.type !== "numeral" + ) + ) + .map(([word]) => word); + if (noAdjectives.length > 0) { + console.warn( + "the following doesn't have adjective nor determiner definition", + ); + for (const word of noAdjectives) { + console.warn(word); + } + console.warn(); + } + const string = JSON.stringify(output.output[0]); + await Deno.writeTextFile( + DESTINATION, + `import{Dictionary}from"./type.ts";export const DICTIONARY:Dictionary=${string}`, + ); + return true; + } +} diff --git a/dictionary/dictionary b/dictionary/dictionary new file mode 100644 index 0000000..41f298c --- /dev/null +++ b/dictionary/dictionary @@ -0,0 +1,1640 @@ +# https://github.com/ilo-token/ilo-token.github.io/wiki/Guidelines-for-editing-dictionary + +a: + ah/aah/aaah(f); + # oh/ohh/ohhh(f); + # ha/haa/haaa(f); + # eh/ehh/ehhh(f); + # um/umm/ummm(f); + # oy/oyy/oyyy(f); + + `[placed after something for emphasis or emotion]`(particle def); + +akesi: + reptile(n); + amphibian(n); + scaly(adj material) creature(n); + crawling(adj opinion) creature(n); + + reptilian(adj qualifier); + amphibian(adj qualifier); + +ala: + not(d negative); + not(adv); + nothing(n plural); + no(d quantifier plural); + + `[negates a word or phrase]`(particle def); + `[forms a yes-no question]`(particle def); + + 0(num); + +alasa: + # hunt(v) [object]; + search(v) [object]; + + try(v) to(particle) [predicate v]; + + # hunting(n gerund); + searching(n gerund); + + # hunting(adj qualifier); + searching(adj qualifier); + +ale, ali: + all(d quantifier plural); + # every(d distributive); + everything(n singular); + # entirety(n singular); + universe(n singular); + + 100(num); + + completely(adv); + +anpa: + # bottom(n singular); + underside(n singular); + + defeated(adj opinion); + humble(adj opinion); + lowly(adj opinion); + + defeat(v) [object]; + +ante: + different(adj opinion); + # altered(adj opinion); + + # alter(v) [object]; + # modify(v) [object]; + change(v) [object]; + + other(adj origin); + other(n); + + difference(n); + +anu: + `[separates multiple possibilities, replacing another particle]`(particle def); + or(particle def); + + # # These are no longer in Linku + # choose(v) [object]; + # decide(v); + + # choosing(n gerund); + # deciding(n gerund); + +awen: + # stay(v); + # remain(v); + # wait(v); + pause(v) [object]; + + protect(v) [object]; + # keep safe(v); + + continue(v); + + continue(v) to(particle) [predicate v]; + + # staying(n gerund); + # remaining(n gerund); + # waiting(n gerund); + pausing(n gerund); + protecting(n gerund); + continuing(n gerund); + + # staying(adj qualifier); + # remaining(adj qualifier); + # waiting(adj qualifier); + pausing(adj qualifier); + protecting(adj qualifier); + continuing(adj qualifier); + +e: + `[marks the start of a direct object]`(particle def); + +en: + `[separates multiple subjects]`(particle def); + +epiku: + # epic(adj opinion); + # cool(adj opinion); # Could be conflated with "cool" as in lete + awesome(adj opinion); + # amazing(adj opinion); + + # coolness(n); + awesomeness(n); + +esun: + # trade(v) [object]; + # barter(v) [object]; + # exchange(v) [object]; + swap(v) [object]; + buy(v) [object]; + sell(v) [object]; + + market(n); + # shop(n); + # fair(n); + # bazaar(n); + business(adj qualifier) place(n); + + # trading(n gerund); + # bartering(n gerund); + # exchanging(n gerund); + swapping(n gerund); + buying(n gerund); + sell(n gerund); + + # trading(adj qualifier); + # bartering(adj qualifier); + # exchanging(adj qualifier); + swapping(adj qualifier); + buying(adj qualifier); + sell(adj qualifier); + +ijo: + # thing(n); + # object(n); + entity(n); + being(n); + matter(n); + phenomenon(n); + +ike: + negative(adj opinion) quality(n); + + bad(adj opinion); + # unpleasant(adj opinion); + # harmful(adj opinion); + # unneeded(adj opinion); + + badly(adv); + # unpleasantly(adv); + # harmfully(adv); + +ilo: + tool(n); + # implement(n); + # machine(n); + # device(n); + + tool-related(adj qualifier); + # implement-related(adj qualifier); + # machine-related(adj qualifier); + # device-related(adj qualifier); + +insa: + inside(adj origin) part(n); + # center(n singular); + middle(n); + midpoint(n); + + internal(adj origin); + +jaki: + disgusting(adj opinion); + unclean(adj opinion); + # unsanitary(adj opinion); + # toxic(adj opinion); + repulsive(adj opinion); + # rotten(adj opinion); + + grossness(n); + # toxicity(n); + # rottenness(n); + +jan: + person(n); + somebody(n singular); + human being(n); + + human-like(adj qualifier); + # person-like(adj qualifier); + + human-related(adj qualifier); + # person-related(adj qualifier); + +jasima: + reflect(v) [object]; + # echo(v) [object]; + # mirror(v) [object]; + duplicate(v) [object]; + + reflecting(n gerund); + # echoing(n gerund); + # mirroring(n gerund); + duplicating(n gerund); + + reflecting(adj qualifier); + # echoing(adj qualifier); + # mirroring(adj qualifier); + duplicating(adj qualifier); + +jelo: + yellow(adj color); + # golden(adj color); + # lime(adv) yellow(adj color); + # yellowish(adv) orange(adj color); + + yellow(n singular); + # lime(adj color) yellow(n singular); + # yellowish(adj color) orange(n singular); + +jo: + # hold(v) [object]; + carry(v) [object]; + contain(v) [object]; + own(v) [object]; + + # holding(n gerund); + carrying(n gerund); + containing(n gerund); + owning(n gerund); + +kala: + fish(n); + # marine(adj origin) animal(n); + # sea(adj qualifier) creature(n); + swimming(adj qualifier) creature(n); + + swim(v); + + fish-like(adj qualifier); + fish-related(adj qualifier); + + swimming(adj qualifier); + +kalama: + produce(v) sound(n); + sound(n); + + sound-related(adj qualifier); + +kama: + arrive(v); + # approach(v); + summon(v) [object]; + + future(adj age); + + become(linking v) [predicate]; + began(v) to(particle) [predicate v]; + + arriving(n gerund); + # approaching(n gerund); + summoning(n gerund); + becoming(n gerund); + + arriving(adj qualifier); + # approaching(adj qualifier); + summoning(adj qualifier); + becoming(adj qualifier); + +kasi: + plant(n); + # vegetation(n); + # herb(n); + leaf(n); + + plant-like(adj qualifier); + # vegetation-like(adj qualifier); + # herb-like(adj qualifier); + leaf-like(adj qualifier); + + plant-related(adj qualifier); + # vegetation-related(adj qualifier); + # herb-related(adj qualifier); + leaf-related(adj qualifier); + +ken: + can(modal v) [predicate]; + may(modal v) [predicate]; + + ability(n); + permission(n); + possibility(n); + + allow(v) [object]; + enable(v) [object]; + +kepeken: + using(prep); + by means of(prep); + +kijetesantakalu: + raccoon(n); + # kinkajou(n); + procyonid(n); + musteloid(n); + + procyonid(adj qualifier); + musteloid(adj qualifier); + + raccoon-like(adj qualifier); + # kinkajou-like(adj qualifier); + + raccoon-related(adj qualifier); + # kinkajou-related(adj qualifier); + +kili: + fruit(n); + vegetable(n); + mushroom(n); + + fruit-like(adj qualifier); + vegetable-like(adj qualifier); + mushroom-like(adj qualifier); + + fruit-related(adj qualifier); + vegetable-related(adj qualifier); + mushroom-related(adj qualifier); + +kin: + `[at sentence start]`(particle def); + also(adv); + additionally(adv); + +kipisi: + # split(v) [object]; + cut(v) [object]; + # slice(v) [object]; + + piece(n) of(prep) [headword]; + part(n) of(prep) [headword]; + + sharp(adj physical quality); + pointy(adj physical quality); + +kiwen: + hard(adj material) object(n); + # metal(n); + # rock(n); + # stone(n); + + hard(adj material); + +ko: + semi-solid(n singular); + # paste(n singular); + powder(n singular); + # goo(n singular); + # sand(n singular); + # soil(n singular); + # clay(n singular); + + squishy(adj material); + moldable(adj material); + + sticky(adj material); + +kokosila: + speak(v) a(d article singular) non-Toki Pona(adj qualifier) language(n singular); + +kon: + air(n); + # breath(n); + # wind(n); + + essence(n); + # spirit(n); + soul(n); + # ghost(n); + + unseen(adj physical quality) agent(n); + + air-like(adj qualifier); + # breath-like(adj qualifier); + # wind-like(adj qualifier); + essence-like(adj qualifier); + # spirit-like(adj qualifier); + soul-like(adj qualifier); + # ghost-like(adj qualifier); + + air-related(adj qualifier); + # breath-related(adj qualifier); + # wind-related(adj qualifier); + essence-related(adj qualifier); + # spirit-related(adj qualifier); + soul-related(adj qualifier); + # ghost-related(adj qualifier); + +ku: + interact(v) with(prep) the(d article) Toki Pona(adj qualifier) Dictionary(n singular); + the(d article) Toki Pona(adj qualifier) Dictionary(n); + +kule: + color(n); + # pigment(n); + category(n); + # genre(n); + flavor(n); + + colorful(adj color); + diverse(adj qualifier); + +kulupu: + group(n); + # community(n); + # society(n); + # company(n); + # nation(n); + # collection(n); + # team(n); + # crowd(n); + + group-related(adj qualifier); + # community-related(adj qualifier); + # society-related(adj qualifier); + # company-related(adj qualifier); + # nation-related(adj qualifier); + # collection-related(adj qualifier); + # team-related(adj qualifier); + # crowd-related(adj qualifier); + +kute: + ear(n); + hearing(adj qualifier) organ(n); + + hear(v) [object]; + listen(v) at(prep) [object]; + pay(v) attention(n) to(prep) [object]; + obey(v) [object]; + + hearing(n gerund); + listen(n gerund); + obeying(n gerund); + + hearing(adj qualifier); + listen(adj qualifier); + obeying(adj qualifier); + +la: + `[mark the previous statement as context to a following statement]`(particle def); + +lanpan: + take(v) [object]; + # seize(v) [object]; + # steal(v) [object]; + + taking(n gerund); + # seizing(n gerund); + # stealing(n gerund); + + taking(adj qualifier); + # seizing(adj qualifier); + # stealing(adj qualifier); + +lape: + sleep(n singular); + rest(n singular); + sleep(v); + rest(v); + + sleeping(adj qualifier); + resting(adj qualifier); + +laso: + # turquoise(adj color); + blue(adj color); + green(adj color); + cyan(adj color); + # indigo(adj color); + # lime(adv) green(adj color); + + # turquoise(n singular); + blue(n singular); + green(n singular); + cyan(n singular); + # indigo(n singular); + # lime(adj color) green(n singular); + +lawa: + head(n); + mind(n); + brain(n); + + control(v) [object]; + lead(v) [object]; + guide(v) [object]; + + # government(n); + leader(n); + + rule(n); + law(n); + + head-related(adj qualifier); + mind-related(adj qualifier); + brain-related(adj qualifier); + # government-related(adj qualifier); + leader-related(adj qualifier); + rule-related(adj qualifier); + law-related(adj qualifier); + +leko: + square(n); + # cube(n); + block(n); + blocky(adj physical quality) object(n); + # bricks(n plural); + # stairs(n plural); + + square(adj physical quality); + cuboid(adj physical quality); + blocky(adj physical quality); + +len: + # cloth(n); + # clothing(n); + fabric(n); + # textile(n); + + # covered(adj origin); + hidden(adj origin); + secret(adj opinion); + private(adj opinion); + + cover(v) [object]; + hide(v) [object]; + +lete: + cold(adj material); + # cool(adj material); # Could be conflated with "cool" as in pona + # frozen(adj material); + + freeze(v) [object]; + # chill(v) [object]; + + # raw(adj opinion); + # uncooked(adj opinion); + + coldness(n); + # coolness(n); + +li: + `[marks the start of an indicative verb (statement)]`(particle def); + +lili: + small(adj size); + short(adj size); + young(adj age); + + few(d quantifier plural); + + piece(n) of(prep) [headword]; + part(n) of(prep) [headword]; + + smallness(n); + shortness(n); + + slightly(adv); + +linja: + long(adj size) flexible(adj material) thing(n); + # rope(n); + # yarn(n); + # hair(n); + # fur(n); + # line(n); + # strand(n); + + long(adj size) and(c) flexible(adj material); + +lipu: + flat(adj physical quality) bendable(adj material) object(n); + paper(n); + # card(n); + # leaf(n); + + written(adj qualifier) text(n); + # document(n); + # book(n); + website(n); + # clay(adj qualifier) tablet(n); + + flat(adj size) and(c) bendable(adj material); + +loje: + red(adj color); + # magenta(adj color); + # scarlet(adj color); + # pink(adj color); + # rust-colored(adj color); + # reddish(adv) orange(adj color); + + red(n singular); + # magenta(n singular); + # scarlet(n singular); + # pink(n singular); + # rust-color(n singular); + # reddish(adj color) orange(n singular); + +lon: + present(adj origin); + existing(adj opinion); + real(adj opinion); + true(adj opinion); + + located at(prep); + in(prep); + during(prep); + in the context of(prep); + + truth(n singular); + + truthfully(adv); + +luka: + hand(n); + arm(n); + tactile(adj qualifier) organ(n); + grasping(adj qualifier) organ(n); + + grasp(v) [object]; + interact(v) with(prep) [object]; + feel(v) [object] using(prep) touch(n singular); + + 5(num); + + hand-like(adj physical quality); + arm-like(adj physical quality); + + hand-related(adj qualifier); + arm-related(adj qualifier); + +lukin, oko: + see(v) [object]; + look(v) at(prep) [object]; + # view(v) at(prep) [object]; + examine(v) [object]; + read(v) [object]; + watch(v) [object]; + + visual(adj qualifier); + + eye(n); + seeing(adj qualifier) organ(n); + + try(v) to(particle) [predicate v]; + +lupa: + hole(n); + # pit(n); + # cave(n); + # doorway(n); + # window(n); + # portal(n); + + # TODO adjectives + +ma: + earth(n singular); + land(n singular); + soil(n singular); + + # country(n); + territory(n); + world(n singular); + + outdoor(n); + + earth-related(adj qualifier); + land-related(adj qualifier); + soil-related(adj qualifier); + # country-related(adj qualifier); + territory-related(adj qualifier); + world-related(adj qualifier); + outdoor-related(adj qualifier); + +majuna: + old(adj age); + # aged(adj age); + ancient(adj age); + + oldness(n); + +mama: + parent(n); + ancestor(n); + + creator(n); + # originator(n); + + caretaker(n); + sustainer(n); + guardian(n); + + parent-related(adj qualifier); + ancestor-related(adj qualifier); + creator-related(adj qualifier); + # originator-related(adj qualifier); + caretaker-related(adj qualifier); + sustainer-related(adj qualifier); + guardian-related(adj qualifier); + +mani: + money(n singular); + # currency(n); + valuable(adj opinion) thing(n); + + # gold(n); + investment(n); + livestock(n); + + valuable(adj opinion); + +meli: + woman(n); + feminine(adj opinion); + wife(n); + girlfriend(n); + +meso: + midpoint(n); + medium(adj size); + mediocre(adj opinion); + +mi: + I/me/we/us(personal pronoun); + my/our(d possessive); + +mije: + man(n); + masculine(adj qualifier); + husband(n); + boyfriend(n); + +misikeke: + medical(adj qualifier) item(n); + # prescription(n); + meditation(n); + exercise(n); + # bandage(n); + therapy(n); + + medical(adj qualifier); + +moku: + eat(v) [object]; + drink(v) [object]; + consume(v) [object]; + # swallow(v) [object]; + # ingest(v) [object]; + + food(n); + edible(adj opinion) thing(n); + + food-related(adj qualifier); + +moli: + death(n singular); + dead(adj age); + die(v); + # dying(n); + kill(v) [object]; + # murder(v) [object]; + +monsi: + back(n singular); + rear(n singular); + + back-related(adj qualifier); + rear-related(adj qualifier); + +monsuta: + fear(n); + # nervousness(n); + dread(n); + + scary(adj opinion); + # frightening(adj opinion); + + scary(adj opinion) thing(n); + # predator(n); + # threat(n); + danger(n); + +mu: + bark(v); + cough(v); + roar(v); + hum(v); + quack(v); + hiss(v); + buzz(v); + growl(v); + yawn(v); + screech(v); + chirp(v); + gobble(v); + purr(v); + honk(v); + burp(v); + chomp(v); + + bark(i); + cough(i); + roar(i); + hum(i); + ow(i); + quack(i); + hiss(i); + buzz(i); + growl(i); + yawn(i); + woof(i); + screech(i); + chirp(i); + hoot(i); + moo(i); + hiccup(i); + gobble(i); + purr(i); + baa(i); + honk(i); + tweet(i); + ouch(i); + meow(i); + burp(i); + chomp(i); + ribbit(i); + achoo(i); + +mun: + # moon(n singular); + night sky(adj origin) object(n); + # star(n); + celestial(adj origin) body(n); + + celestial(adj origin); + + moon-related(adj qualifier); + star-related(adj qualifier); + +musi: + # fun(n singular); + game(n); + entertainment(n); + + art(n); + fun(adj opinion); + amusing(adj opinion); + # interesting(adj opinion); + comical(adj opinion); + # silly(adj opinion); + + have(v) fun(n singular); + +mute: + many(d quantifier plural); + # several(d quantifier plural); + very(adv); + 20(num); + + quantity(n); + +n: + hm/hmm/hmmm(f); + # uh/uhh/uhhh(f); + # mm/mmm/mmmm(f); + # er/err/errr(f); + # um/umm/ummm(f); + `[indicate thinking or pause]`(particle def); + +namako: + spice(n); + ornament(n); + # adornment(n); + + extra(adj opinion); + additional(adj opinion); + +nanpa: + number(n); + -th(particle def); + `[ordinal number]`(particle def); + + numeric(adj opinion); + +nasa: + unusual(adj opinion); + # strange(adj opinion); + silly(adj opinion); + # drunk(adj opinion); + intoxicated(adj opinion); + + intoxicate(v) [object]; + + strangely(adv); + + silliness(n singular); + strangeness(n singular); + +nasin: + method(n); + doctrine(n); + tradition(n); + + path(n); + # road(n); + # way(n); + + method-related(adj qualifier); + doctrine-related(adj qualifier); + tradition-related(adj qualifier); + path-related(adj qualifier); + # road-related(adj qualifier); + # way-related(adj qualifier); + +nena: + protuberance(n); + bump(n); + button(n); + hill(n); + nose(n); + + protuberance-related(adj qualifier); + bump-related(adj qualifier); + button-related(adj qualifier); + hill-related(adj qualifier); + nose-related(adj qualifier); + +ni: + this/these(n); + that/those(n); + this/these(d demonstrative); + that/those(d demonstrative); + +nimi: + word(n); + name(n); + + word-related(adj qualifier); + name-related(adj qualifier); + +noka: + foot(n); + leg(n); + locomotive(adj qualifier) organ(n); + root(n); + + foot-like(adj physical quality); + leg-like(adj physical quality); + root-like(adj physical quality); + + foot-related(adj qualifier); + leg-related(adj qualifier); + root-related(adj qualifier); + +o: + `[marks the end of a vocative (who is being spoken to)]`(particle def); + `[marks the start of an imperative (command, wish, instruction)]`(particle def); + should(particle def); + +olin: + have(v) strong(adj opinion) emotional(adj opinion) bond(n singular) with(prep) [object]; + respect(v) [object]; + + affection(n singular); + appreciation(n singular); + respect(n singular); + relationship(n); + platonic(adj qualifier); + romantic(adj qualifier); + familial(adj qualifier); + +ona: + they/them(personal pronoun plural); + it/it(personal pronoun singular); + their(d possessive); + its(d possessive); + +open: + # begin(v); + start(v); + open(v) [object]; + turn on(v) [object]; + + beginning(n); + + beginning-related(adj qualifier); + +pakala: + damaged(adj opinion); + # broken(adj opinion); + # botched(adj opinion); + # harmed(adj opinion); + # messed up(adj opinion); + + mistake(n); + + damage(v) [object]; + # break(v) [object]; + # botch(v) [object]; + # harm(v) [object]; + # mess up(v) [object]; + +pali: + # work(n); + activity(n); + + create(v) [object]; + # build(v) [object]; + design(v) [object]; + + put(v) effort(n) toward(prep) [object]; + take(v) action(n) to(prep) [object]; + + # work-related(adj qualifier); + activity-related(adj qualifier); + +palisa: + long(adj size) hard(adj material) thing(n); + # branch(n); + # pole(n); + # rod(n); + # stick(n); + # spine(n); + # mast(n); + + long(adj size) and(c) hard(adj material); + +pan: + grain(n); + starchy(adj material) food(n); + baked(adj qualifier) goods(n plural); + + # rice(n); + # sorghum(n); + # bread(n); + # noodle(n); + # masa(n); + # porridge(n); + # injera(n); + + starchy(adj material); + grain-related(adj qualifier); + +pana: + give(v) [object]; + # send(v) [object]; + emit(v) [object]; + provide(v) [object]; + put(v) [object]; + release(v) [object]; + + giving(n gerund); + # sending(n gerund); + emitting(n gerund); + providing(n gerund); + putting(n gerund); + releasing(n gerund); + + giving(adj qualifier); + # sending(adj qualifier); + emitting(adj qualifier); + providing(adj qualifier); + putting(adj qualifier); + releasing(adj qualifier); + +pi: + `[modify the next word with one or more following words]`(particle def); + +pilin: + experience(n); + emotion(n); + # feeling(n); + touch(n); + + feel(v) [object]; + touch(v) [object]; + + heart(n); + + experience-related(adj qualifier); + emotion-related(adj qualifier); + # feeling-related(adj qualifier); + touch-related(adj qualifier); + heart-related(adj qualifier); + +pimeja: + dark(adj color); + unlit(adj color); + + dark(adj color) color(n); + # black(adj color); + # purple(adj color); + # brown(adj color); + + darkness(n singular); + +pini: + finish(v) [object]; + stop(v) [object]; + prevent(v) [object]; + + close(v) [object]; + disable(v) [object]; + turn off(v) [object]; + + ended(adj age); + past(adj age); + + edge(n); + end(n); + conclusion(n); + +pipi: + insect(n); + bug(n); + # spider(n); + tiny(adj size) crawling(adj qualifier) creature(n); + + insect-like(adj physical quality); + bug-like(adj physical quality); + + insect-related(adj qualifier); + bug-related(adj qualifier); + +poka: + hip(n); + side(n); + vicinity(n); + nearby(adj origin); + +poki: + container(n); + # bag(n); + # bowl(n); + # box(n); + # cup(n); + # cupboard(n); + # drawer(n); + # folder(n); + + container-like(adj physical quality); + container-related(adj qualifier); + +pona: + positive(adj opinion) quality(n); + + good(adj opinion); + # pleasant(adj opinion); + # helpful(adj opinion); + # friendly(adj opinion); + # useful(adj opinion); + # peaceful(adj opinion); + + nicely(adv); + +pu: + interact(v) with(prep) the(d article) book(n singular) titled(adj) `Toki Pona: The Language of Good`(proper n); + the(d article) book(n) titled(adj) `Toki Pona: The Language of Good`(proper n); + +sama: + same(adj opinion); + # similar(adj opinion); + # alike(adj opinion); + + fellow(adj opinion); + + similar to(prep); + # same as(prep); + + similarity(n); + +seli: + hot(adj material); + warm(adj material); + + heat(n); + fire(n); + # flame(n); + + burn(v) [object]; + +selo: + outer(adj origin) layer(n); + # skin(n singular); + # peel(n singular); + # shell(n singular); + # bark(n singular); + + # outer(adj origin) shape(n); + outer(adj origin) form(n); + boundary(n); + + skin-related(adj qualifier); + boundary-related(adj qualifier); + +seme: + `[indicate a question by marking missing info in a sentence]`(particle def); + what/what(n); + which/which(n); + what(d interrogative); + which(d interrogative); + +sewi: + # top(adj origin); + above(adj origin); + highest(adj origin) part(n); + + divine(adj opinion); + sacred(adj opinion); + supernatural(adj opinion); + + awesome(adj opinion); + inspiring(adj opinion); + excelling(adj opinion); + + elevate(v) [object]; + +sijelo: + body(n); + shape(n); + physical(adj qualifier) state(n); + # torso(n); + substance(n); + form(n); + + body-related(adj qualifier); + shape-related(adj qualifier); + # torso-related(adj qualifier); + substance-related(adj qualifier); + form-related(adj qualifier); + +sike: + circle(n); + sphere(n); + spiral(n); + round(adj physical quality) thing(n); + # ball(n); + # wheel(n); + + repeating(adj qualifier) thing(n); + cycle(n); + orbit(n); + loop(n); + + round(adj physical quality); + repeating(adj qualifier); + + repeatedly(adv); + +sin: + new(adj age); + fresh(adj opinion); + + update(v) [object]; + + newness(n singular); + + repeatedly(adv); + newly(adv); + +sina: + you/you(personal pronoun plural); + your(d possessive); + +sinpin: + vertical(adj physical quality) surface(n); + # wall(n); + # board(n); + + front(n); + face(n); + + wall-related(adj qualifier); + # board-related(adj qualifier); + front-related(adj qualifier); + face-related(adj qualifier); + +sitelen: + image(n); + # picture(n); + representation(n); + symbol(n); + # mark(n); + # writing(n); + + image-related(adj qualifier); + # picture-related(adj qualifier); + representation-related(adj qualifier); + symbol-related(adj qualifier); + # mark-related(adj qualifier); + # writing-related(adj qualifier); + +soko: + mushroom(n); + fungus(n); + lichen(n); + + mushroom-like(adj physical quality); + fungus-like(adj physical quality); + lichen-like(adj physical quality); + + mushroom-related(adj qualifier); + fungus-related(adj qualifier); + lichen-related(adj qualifier); + +sona: + # TODO: be skilled in, be wise about + knowledge(n); + information(n); + data(n); + + know(v) [object]; + + know(v) how to(particle) [predicate v]; + + knowledgeable(adj opinion); + + knowledge-related(adj qualifier); + information-related(adj qualifier); + data-related(adj qualifier); + +soweli: + fuzzy(adj physical quality) creature(n); + land(adj origin) animal(n); + beast(n); + + animal-like(adj physical quality); + beast-like(adj physical quality); + + animal-related(adj qualifier); + beast-related(adj qualifier); + +su: + interact(v) with(prep) Sonja's(d possessive) story(adj qualifier) book(adj qualifier) series(n singular); + Sonja's(d possessive) story(adj qualifier) book(adj qualifier) series(n); + +suli: + big(adj size); + heavy(adj size); + # large(adj size); + long(adj size); + tall(adj size); + wide(adj size); + + important(adj opinion); + relevant(adj opinion); + + hugeness(n singular); + importance(n singular); + +suno: + # light(v); + shine(v); + # glow(v); + + light(n); + radiance(n); + + # sun(n); + light(adj qualifier) source(n); + # brightness(n); + + light-related(adj qualifier); + +supa: + horizontal(adj physical quality) surface(n); + # bed(n); + # floor(n); + # desk(n); + # plate(n); + # table(n); + # platform(n); + + horizontal(adj physical quality); + +suwi: + sweet(adj material); + fragrant(adj material); + cute(adj opinion); + # adorable(adj opinion); + + sweetness(n); + fragrance(n); + cuteness(n); + +tan: + from(prep); + because of(prep); + + origin(n); + cause(n); + +taso: + only(adj opinion); + only(adv); + `[marks a sentence as qualifying or contradictory]`(particle def); + but(particle def); + however(particle def); + +tawa: + motion(n); + # walk(v); + shake(v) [object]; + fly(v); + travel(v); + + # walking(n gerund); + shaking(n gerund); + flight(n); + travelling(n gerund); + + to(prep); + for(prep); + going to(prep); + from the perspective of(prep); + + moving(adj physical quality); + +telo: + liquid(n singular); + # water(n singular); + # gasoline(n singular); + # soda(n singular); + # lava(n singular); + # soup(n singular); + # oil(n singular); + # ink(n singular); + + liquid(adj material); + +tenpo: + time(n singular); + # event(n); + # situation(n); + # moment(n); + # period(n); + # duration(n singular); + + time-related(adj qualifier); + # event-related(adj qualifier); + # situation-related(adj qualifier); + # moment-related(adj qualifier); + # period-related(adj qualifier); + # duration-related(adj qualifier); + +toki: + communicate(v) about(prep) [object]; + # say(v) [object]; + think(v) about(prep) [object]; + + conversation(n); + story(n); + + language(n); + + communicating(adj qualifier); + # saying(adj qualifier); + thinking(adj qualifier); + + conversation-related(adj qualifier); + story-related(adj qualifier); + language-related(adj qualifier); + +tomo: + indoor(adj origin) space(n); + # shelter(n); + # room(n); + building(n); + # home(n); + # tent(n); + # shack(n); + + # shelter-related(adj qualifier); + # room-related(adj qualifier); + building-related(adj qualifier); + # home-related(adj qualifier); + # tent-related(adj qualifier); + # shack-related(adj qualifier); + +tonsi: + non-binary(adj qualifier) person(n); + gender nonconforming(adj qualifier) person(n); + genderqueer(adj qualifier) person(n); + transgender(adj qualifier) person(n); + + non-binary(adj qualifier); + gender nonconforming(adj qualifier); + genderqueer(adj qualifier); + transgender(adj qualifier); + +tu: + 2(num); + separate(v) [object]; + divide(v) [object]; + split(v) [object]; + +unpa: + sex(n); + have(v) sex(n singular) with(prep) [object]; + + sexual(adj qualifier); + +uta: + mouth(n); + lip(n); + throat(n); + consuming(adj qualifier) orifice(n); + + mouth-related(adj qualifier); + lips-related(adj qualifier); + throat-related(adj qualifier); + +utala: + # fight(v) against(prep) [object]; + compete(v) against(prep) [object]; + # battle(v) against(prep) [object]; + + competition(n); + challenge(n); + + struggle(v); + strive(v); + + # fight-related(adj qualifier); + competition-related(adj qualifier); + challenge-related(adj qualifier); + +walo: + light-colored(adj color); + # white(adj color); + # pale(adj color); + # light(adv) gray(adj color); + c# ream-colored(adj color); + + light(adj color) color(n); + # white(n); + # paleness(n); + # light(adj color) gray(n); + # cream(adj qualifier) color(n); + +wan: + 1(num); + singular(adj opinion); + + combine(v) [object]; + # join(v) [object]; + # mix(v) [object]; + # fuse(v) [object]; + +waso: + bird(n); + flying(adj qualifier) creature(n); + winged(adj qualifier) animal(n); + + fly(v); + + bird-like(adj physical quality); + bird-related(adj physical quality); + +wawa: + power(n); + # energy(n); + # strength(n); + + confident(adj opinion); + intense(adj opinion); + # forceful(adj opinion); + + amazing(adj opinion); + # impressive(adj opinion); + + powerfully(adv); + +weka: + absent(adj origin); + away(adj origin); + distant(adj origin); + + remove(v) [object]; + # TODO: get rid of + + absence(n); + removal(n); + +wile: + want(v) [object]; + # desire(v) [object]; + # wish(v) for(prep) [object]; + require(v) [object]; + + want(n); + # desire(n); + # wish(n); + requirement(n); + + want(v) to(particle) [predicate v]; + + want-related(adj qualifier); + # desire-related(adj qualifier); + # wish-related(adj qualifier); + requirement-related(adj qualifier); diff --git a/dictionary/type.ts b/dictionary/type.ts new file mode 100644 index 0000000..d457747 --- /dev/null +++ b/dictionary/type.ts @@ -0,0 +1,89 @@ +export type Noun = { + determiner: Array; + adjective: Array; + singular: null | string; + plural: null | string; + gerund: boolean; + postAdjective: null | { + adjective: string; + name: string; + }; +}; +export type DeterminerType = + | "article" + | "demonstrative" + | "distributive" + | "interrogative" + | "possessive" + | "quantifier" + | "negative"; +export type Quantity = "singular" | "plural" | "both"; +export type Determiner = { + determiner: string; + plural: null | string; + kind: DeterminerType; + number: Quantity; +}; +export type AdjectiveType = + | "opinion" + | "size" + | "physical quality" + | "age" + | "color" + | "origin" + | "material" + | "qualifier"; +export type Adjective = { + adverb: Array; + adjective: string; + kind: AdjectiveType; +}; +export type Definition = + | { type: "filler"; before: string; repeat: string; after: string } + | { type: "particle definition"; definition: string } + | ({ type: "noun" } & Noun) + | { + type: "noun preposition"; + noun: Noun; + preposition: string; + } + | { + type: "personal pronoun"; + singular: null | { subject: string; object: string }; + plural: null | { subject: string; object: string }; + } + | ({ type: "determiner" } & Determiner) + | { type: "numeral"; numeral: number } + | ({ type: "adjective" } & Adjective) + | { type: "compound adjective"; adjective: Array } + | { type: "adverb"; adverb: string } + | { + type: "verb"; + presentSingular: string; + presentPlural: string; + past: string; + directObject: null | Noun; + indirectObject: Array<{ + preposition: string; + object: Noun; + }>; + forObject: boolean | string; + } + | { + type: "preverb as linking verb"; + linkingVerb: string; + } + | { + type: "preverb as finite verb"; + presentSingular: string; + presentPlural: string; + past: string; + particle: null | string; + } + | { + type: "preverb as modal verb"; + verb: string; + } + | { type: "preposition"; preposition: string } + | { type: "interjection"; interjection: string }; +export type Dictionary = { [word: string]: Array }; diff --git a/dist/images/16.png b/dist/images/16.png new file mode 100644 index 0000000..e43df2a Binary files /dev/null and b/dist/images/16.png differ diff --git a/dist/images/180.png b/dist/images/180.png new file mode 100644 index 0000000..5e75cba Binary files /dev/null and b/dist/images/180.png differ diff --git a/dist/images/192.png b/dist/images/192.png new file mode 100644 index 0000000..53ef806 Binary files /dev/null and b/dist/images/192.png differ diff --git a/dist/images/32.png b/dist/images/32.png new file mode 100644 index 0000000..6e0ecdd Binary files /dev/null and b/dist/images/32.png differ diff --git a/dist/images/48.png b/dist/images/48.png new file mode 100644 index 0000000..6a6e7b0 Binary files /dev/null and b/dist/images/48.png differ diff --git a/dist/images/ico.ico b/dist/images/ico.ico new file mode 100644 index 0000000..31a9e41 Binary files /dev/null and b/dist/images/ico.ico differ diff --git a/dist/images/preview.png b/dist/images/preview.png new file mode 100644 index 0000000..f2f9fcb Binary files /dev/null and b/dist/images/preview.png differ diff --git a/dist/index.html b/dist/index.html new file mode 100644 index 0000000..7c92f50 --- /dev/null +++ b/dist/index.html @@ -0,0 +1,127 @@ + + + + + ilo Token + + + + + + + + + + + + + + + + + +

ilo Token

+

+ ⚠ WARNING: Work in progress; Some things may not work + properly. +

+

+ An open-source rule-based Toki Pona to English translator. No data are + collected. + Limitations. +

+ +
+ + +
+ +
    +

    +
      +
      + Provide feedback + +
      +
      +

      + +

      +
      + +

      + Settings + (Help) +

      + + + + + + +
      + +
      + + +
      +
      + + diff --git a/dist/style.css b/dist/style.css new file mode 100644 index 0000000..03442f2 --- /dev/null +++ b/dist/style.css @@ -0,0 +1,107 @@ +@font-face { + font-family: nasin-nanpa; + src: url("https://github.com/ETBCOR/nasin-nanpa/raw/main/versions/nasin-nanpa.otf"); +} +body { + margin: 10px; + font-family: sans-serif, nasin-nanpa; +} +a { + color: #0057af; +} +a:visited { + color: #551a8b; +} +#input { + box-sizing: border-box; + resize: none; + width: 100%; + + background-color: inherit; + color: inherit; + padding: 5px; + + border-color: gray; + border-width: 2px; + border-radius: 2px; +} +#error, +#error-list { + color: #b60000; +} +.align-right { + text-align: right; +} +summary { + cursor: pointer; +} +button, +select { + padding: 10px; +} +dialog { + padding: 0px; + padding-top: 20px; + padding-bottom: 20px; + flex-direction: column; + resize: both; +} +dialog:modal { + display: flex; +} +dialog > label, +dialog > h1 { + padding: 10px; + margin: 0; +} +dialog > label[for] { + padding-bottom: 5px; +} +dialog > select { + margin: 10px; + margin-top: 0px; +} +dialog > div { + margin: 10px; + display: flex; +} +dialog > div > div { + flex-grow: 1; +} +dialog input[type="checkbox"] { + margin-left: 10px; + width: 1em; + height: 1em; + float: right; +} +@media (min-width: 800px) { + body { + margin: 50px; + } + dialog { + padding: 20px; + } +} +@media (prefers-color-scheme: dark) { + body { + background-color: black; + color: white; + } + dialog { + background-color: black; + color: white; + } + a { + color: #3197ff; + } + a:visited { + color: #b47de7; + } + #error, + #error-list { + color: #ff5e5e; + } +} +.hide { + display: none; +} diff --git a/index.html b/index.html deleted file mode 100644 index 099d640..0000000 --- a/index.html +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - ilo Token - - -

      ilo Token

      -

      - ⚠ WARNING: Work in progress; Some things may not work - properly. -

      -

      - A rule-based Toki Pona to English translator. - Limitations. -

      - -
      - - -
      -
        -

        -
        - Provide feedback - -
        -
        -

        - -

        -
        - - - diff --git a/nimi-ku-suli.txt b/nimi-ku-suli.txt deleted file mode 100644 index e829fb1..0000000 --- a/nimi-ku-suli.txt +++ /dev/null @@ -1,138 +0,0 @@ -a -akesi -ala -alasa -ale -ali -anpa -ante -anu -awen -e -en -epiku -esun -ijo -ike -ilo -insa -jaki -jan -jasima -jelo -jo -kala -kalama -kama -kasi -ken -kepeken -kijetesantakalu -kili -kin -kipisi -kiwen -ko -kokosila -kon -ku -kule -kulupu -kute -la -lanpan -lape -laso -lawa -leko -len -lete -li -lili -linja -lipu -loje -lon -luka -lukin -lupa -ma -mama -mani -meli -meso -mi -mije -misikeke -moku -moli -monsi -monsuta -mu -mun -musi -mute -n -namako -nanpa -nasa -nasin -nena -ni -nimi -noka -o -oko -olin -ona -open -pakala -pali -palisa -pan -pana -pi -pilin -pimeja -pini -pipi -poka -poki -pona -pu -sama -seli -selo -seme -sewi -sijelo -sike -sin -sina -sinpin -sitelen -soko -sona -soweli -suli -suno -supa -suwi -tan -taso -tawa -telo -tenpo -toki -tomo -tonsi -tu -unpa -uta -utala -walo -wan -waso -wawa -weka -wile diff --git a/src/ast-parser.ts b/src/ast-parser.ts new file mode 100644 index 0000000..0418e87 --- /dev/null +++ b/src/ast-parser.ts @@ -0,0 +1,861 @@ +/** Module for AST Parser. It is responsible for turning an array of token tree into AST. */ + +import { + Clause, + Emphasis, + everyWordUnitInFullClause, + FullClause, + HeadedWordUnit, + Modifier, + MultiplePhrases, + MultiplePredicates, + MultipleSentences, + Phrase, + Preposition, + Sentence, + SimpleHeadedWordUnit, + SimpleWordUnit, + WordUnit, +} from "./ast.ts"; +import { UnexpectedError, UnrecognizedError } from "./error.ts"; +import { Output } from "./output.ts"; +import { + CLAUSE_RULE, + filter, + FULL_CLAUSE_RULE, + MODIFIER_RULES, + MULTIPLE_MODIFIERS_RULES, + MULTIPLE_SENTENCES_RULE, + PHRASE_RULE, + PREPOSITION_RULE, + SENTENCE_RULE, + WORD_UNIT_RULES, +} from "./filter.ts"; +import { + allAtLeastOnce, + choice, + choiceOnlyOne, + count, + eol, + lazy, + many, + manyAtLeastOnce, + optional, + Parser, + sequence, +} from "./parser-lib.ts"; +import { describe, Token } from "./token.ts"; +import { DICTIONARY } from "dictionary/dictionary.ts"; +import { spaces, TOKEN } from "./lexer.ts"; + +const CONTENT_WORD = new Set( + Object + .entries(DICTIONARY) + .filter(([_, definitions]) => + definitions + .some((definition) => + definition.type !== "filler" && + definition.type !== "particle definition" + ) + ) + .map(([word]) => word), +); +const PREPOSITION = new Set( + Object + .entries(DICTIONARY) + .filter(([_, definitions]) => + definitions.some((definition) => definition.type === "preposition") + ) + .map(([word]) => word), +); +const PREVERB = new Set( + Object + .entries(DICTIONARY) + .filter(([_, definitions]) => + definitions.some((definition) => + definition.type === "preverb as finite verb" || + definition.type === "preverb as linking verb" || + definition.type === "preverb as modal verb" + ) + ) + .map(([word]) => word), +); +const TOKI_PONA_WORD = new Set(Object.keys(DICTIONARY)); + +/** Parses a specific type of token. */ +function specificToken( + type: T, +): Parser { + return TOKEN.map((token) => { + if (token.type === type) { + return token as Token & { type: T }; + } else { + throw new UnexpectedError(describe(token), type); + } + }); +} +/** Parses comma. */ +function comma(): Parser { + return specificToken("punctuation") + .map(({ punctuation }) => punctuation) + .filter((punctuation) => punctuation === ","); +} +/** Parses an optional comma. */ +function optionalComma(): Parser { + return optional(comma()); +} +/** Parses a toki pona word. */ +function word(): Parser { + return specificToken("word").map(({ word }) => word); +} +/** Parses proper words spanning multiple words. */ +function properWords(): Parser { + return specificToken("proper word").map(({ words }) => words); +} +/** Parses a toki pona */ +function punctuation(): Parser { + return specificToken("punctuation").map(({ punctuation }) => punctuation); +} +/** Parses word only from `set`. */ +function wordFrom(set: Set, description: string): Parser { + return word().filter((word) => { + if (set.has(word)) { + return true; + } else { + throw new UnrecognizedError(`"${word}" as ${description}`); + } + }); +} +/** Parses a specific word. */ +function specificWord(thatWord: string): Parser { + return word().filter((thisWord) => { + if (thatWord === thisWord) return true; + else throw new UnexpectedError(`"${thisWord}"`, `"${thatWord}"`); + }); +} +/** Parses an emphasis particle. */ +function emphasis(): Parser { + return choice( + specificToken("space long glyph") + .map((longGlyph) => { + if (longGlyph.words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words: longGlyph.words }), + '"ala"', + ); + } + const word = longGlyph.words[0]; + if (word !== "n" && word !== "a") { + throw new UnexpectedError(`"${word}"`, '"a" or "n"'); + } + return { + type: "long word", + word, + length: longGlyph.spaceLength, + } as Emphasis; + }), + specificToken("multiple a") + .map(({ count }) => ({ type: "multiple a", count }) as Emphasis), + specificToken("long word") + .map(({ word, length }) => + ({ type: "long word", word, length }) as Emphasis + ), + wordFrom(new Set(["a", "n"]), "a/n") + .map((word) => ({ type: "word", word }) as Emphasis), + ); +} +function optionalEmphasis(): Parser { + return optional(emphasis()); +} +/** Parses an X ala X construction. */ +function xAlaX( + word: Set, + description: string, +): Parser { + return choice( + sequence( + specificToken("headless long glyph start"), + wordFrom(CONTENT_WORD, "content word"), + specificToken("inside long glyph") + .filter((words) => { + if (words.words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words: words.words }), + '"ala"', + ); + } + if (words.words[0] !== "ala") { + throw new UnexpectedError(`"${words.words[0]}"`, '"ala"'); + } + return true; + }), + wordFrom(CONTENT_WORD, "content word"), + specificToken("headless long glyph end"), + ) + .map(([_, left, _1, right]) => { + if (!word.has(left)) { + throw new UnrecognizedError(`${left} as ${description}`); + } else if (left !== right) { + throw new UnexpectedError(`${right}`, `"${left}"`); + } else { + return { type: "x ala x", word: left } as WordUnit & { + type: "x ala x"; + }; + } + }), + specificToken("x ala x") + .map(({ word }) => + ({ type: "x ala x", word }) as WordUnit & { type: "x ala x" } + ), + ); +} +function simpleWordUnit( + word: Set, + description: string, +): Parser { + return choice( + sequence( + wordFrom(word, description) + .then((word) => + count(manyAtLeastOnce(specificWord(word))) + .map((count) => [word, count + 1] as [string, number]) + ), + ) + .map(([[word, count]]) => + ({ + type: "reduplication", + word, + count, + }) as SimpleHeadedWordUnit + ), + xAlaX(word, description), + wordFrom(word, description) + .map((word) => ({ type: "default", word }) as SimpleHeadedWordUnit), + ); +} +/** Parses word unit except numbers. */ +function wordUnit( + word: Set, + description: string, +): Parser { + return sequence( + simpleWordUnit(word, description), + optionalEmphasis(), + ) + .map(([wordUnit, emphasis]) => ({ + ...wordUnit, + emphasis, + })) + .filter(filter(WORD_UNIT_RULES)); +} +/** Parses a binary combined glyphs. */ +function binaryWords( + word: Set, + description: string, +): Parser<[string, string]> { + return specificToken("combined glyphs").map(({ words }) => { + if (words.length > 2) { + throw new UnrecognizedError(`combined glyphs of ${words.length} words`); + } else if (!word.has(words[0])) { + throw new UnrecognizedError(`"${words[0]}" as ${description}`); + } else if (!CONTENT_WORD.has(words[1])) { + throw new UnrecognizedError(`"${words[1]}" as content word`); + } else { + return words as [string, string]; + } + }); +} +/** Parses a word unit or a combined glyphs. */ +function optionalCombined( + word: Set, + description: string, +): Parser<[WordUnit, Array]> { + return choice( + wordUnit(word, description) + .map((wordUnit) => [wordUnit, []] as [WordUnit, Array]), + binaryWords(word, description) + .map(([first, second]) => + [ + { type: "default", word: first }, + [{ + type: "default", + word: { type: "default", word: second }, + }], + ] as [WordUnit, Array] + ), + ); +} +function wordToNumber(word: string): number { + return DICTIONARY[word] + .filter((definition) => definition.type === "numeral")[0] + .numeral; +} +/** Parses number words in order other than "ale" and "ala". This can parse + * nothing and return 0. + */ +function subAleNumber(): Parser { + return sequence( + many(specificWord("mute")), + many(specificWord("luka")), + many(specificWord("tu")), + many(specificWord("wan")), + ) + .map((array) => array.flat()) + .map((array) => + array.reduce((number, word) => number + wordToNumber(word), 0) + ); +} +/** Parses "ale" or "ali". */ +function ale(): Parser { + return choice(specificWord("ale"), specificWord("ali")); +} +/** Parses number words including "nasin nanpa pona". */ +function number(): Parser { + return choice( + specificWord("ala").map(() => 0), + sequence( + manyAtLeastOnce( + sequence( + subAleNumber().filter((number) => number !== 0), + count(manyAtLeastOnce(ale())), + ), + ), + subAleNumber(), + ) + .map(([rest, last]) => + [...rest, [last, 0]].reduce( + (result, [sub, ale]) => result + sub * 100 ** ale, + 0, + ) + ), + sequence( + count(many(ale())), + subAleNumber(), + ) + .map(([ale, sub]) => ale * 100 + sub) + .filter((number) => number !== 0), + ); +} +/** Parses a "pi" construction. */ +function pi(): Parser { + return choice( + sequence( + specificToken("headed long glyph start") + .filter((words) => { + if (words.words.length !== 1) { + throw new UnexpectedError( + describe({ type: "combined glyphs", words: words.words }), + "pi", + ); + } + if (words.words[0] !== "pi") { + throw new UnexpectedError(`"${words.words[0]}"`, "pi"); + } + return true; + }), + phrase(), + specificToken("headless long glyph end"), + ) + .map(([_, phrase]) => phrase), + specificWord("pi").with(phrase()), + ) + .map((phrase) => ({ type: "pi", phrase }) as Modifier & { type: "pi" }) + .filter(filter(MODIFIER_RULES)); +} +/** Parses multiple modifiers. */ +function modifiers(): Parser> { + return sequence( + many( + choice( + sequence(number(), optionalEmphasis()) + .map(([number, emphasis]) => + ({ + type: "default", + word: { type: "number", number, emphasis }, + }) as Modifier + ) + .filter(filter(MODIFIER_RULES)), + wordUnit(CONTENT_WORD, "modifier") + .map((word) => ({ type: "default", word }) as Modifier) + .filter(filter(MODIFIER_RULES)), + properWords() + .map((words) => ({ type: "proper words", words }) as Modifier) + .filter(filter(MODIFIER_RULES)), + ), + ), + many( + sequence(wordUnit(new Set(["nanpa"]), '"nanpa"'), phrase()) + .map(([nanpa, phrase]) => + ({ type: "nanpa", nanpa, phrase }) as Modifier + ) + .filter(filter(MODIFIER_RULES)), + ), + many(pi()), + ) + .sortBy(([_, nanpaModifiers, _1]) => -nanpaModifiers.length) + .map(([modifiers, nanpaModifiers, piModifiers]) => [ + ...modifiers, + ...nanpaModifiers, + ...piModifiers, + ]) + .filter(filter(MULTIPLE_MODIFIERS_RULES)); +} +/** Parses phrases. */ +function phrase_(): Parser { + return choice( + sequence( + number(), + optionalEmphasis(), + modifiers(), + optionalEmphasis(), + ) + .map(([number, wordModifier, modifiers, phraseModifier]) => + ({ + type: "default", + headWord: { type: "number", number, emphasis: wordModifier }, + modifiers, + emphasis: phraseModifier, + }) as Phrase + ), + binaryWords(PREVERB, "preveb").map(([preverb, phrase]) => + ({ + type: "preverb", + preverb: { type: "default", word: preverb, emphasis: null }, + modifiers: [], + phrase: { + type: "default", + headWord: { type: "default", word: phrase, emphasis: null }, + modifiers: [], + emphasis: null, + }, + emphasis: null, + }) as Phrase + ), + sequence( + optionalCombined(PREVERB, "preverb"), + modifiers(), + phrase(), + optionalEmphasis(), + ) + .map(([[preverb, modifier], modifiers, phrase, emphasis]) => + ({ + type: "preverb", + preverb, + modifiers: [...modifier, ...modifiers], + phrase, + emphasis, + }) as Phrase + ), + preposition() + .map((preposition) => + ({ ...preposition, type: "preposition" }) as Phrase + ), + sequence( + optionalCombined(CONTENT_WORD, "content word"), + modifiers(), + optionalEmphasis(), + ) + .map(([[headWord, modifier], modifiers, emphasis]) => + ({ + type: "default", + headWord, + modifiers: [...modifier, ...modifiers], + emphasis, + }) as Phrase + ), + ) + .filter(filter(PHRASE_RULE)); +} +function phrase(): Parser { + return lazy(phrase_); +} +/** + * Parses nested phrases with given nesting rule, only accepting the top level + * operation. + */ +function nestedPhrasesOnly( + nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, +): Parser { + if (nestingRule.length === 0) { + return phrase() + .map((phrase) => ({ type: "single", phrase }) as MultiplePhrases); + } else { + const [first, ...rest] = nestingRule; + let type: "and conjunction" | "anu"; + if (["en", "li", "o", "e"].includes(first)) { + type = "and conjunction"; + } else { + type = "anu"; + } + return sequence( + nestedPhrases(rest), + manyAtLeastOnce( + optionalComma() + .with(specificWord(first)) + .with(nestedPhrases(rest)), + ), + ) + .map(([group, moreGroups]) => ({ + type, + phrases: [group, ...moreGroups], + })); + } +} +/** Parses nested phrases with given nesting rule. */ +function nestedPhrases( + nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, +): Parser { + if (nestingRule.length === 0) { + return phrase() + .map((phrase) => ({ type: "single", phrase }) as MultiplePhrases); + } else { + return choice( + nestedPhrasesOnly(nestingRule), + nestedPhrases(nestingRule.slice(1)), + ); + } +} +/** Parses phrases separated by "en" or "anu". */ +function subjectPhrases(): Parser { + return choice( + nestedPhrasesOnly(["en", "anu"]), + nestedPhrasesOnly(["anu", "en"]), + phrase().map((phrase) => ({ type: "single", phrase })), + ); +} +/** Parses prepositional phrase. */ +function preposition(): Parser { + return choice( + sequence( + specificToken("headless long glyph start"), + phrase(), + specificToken("headless long glyph end"), + ) + .map(([_, phrase]) => + ({ + preposition: { + type: "default", + word: "lon", + emphasis: null, + }, + modifiers: [], + phrases: { type: "single", phrase }, + emphasis: null, + }) as Preposition + ), + sequence( + specificToken("headed long glyph start") + .map((words) => { + if (words.words.length > 2) { + throw new UnrecognizedError( + `combined glyphs of ${words.words.length} words`, + ); + } + const word = words.words[0]; + if (!PREPOSITION.has(word)) { + throw new UnrecognizedError(`"${word}" as preposition`); + } + return words.words; + }), + phrase(), + specificToken("headless long glyph end"), + ) + .map(([words, phrase]) => { + const modifiers = words + .slice(1) + .map((word) => + ({ type: "default", word: { type: "default", word } }) as Modifier + ); + return { + preposition: { type: "default", word: words[0] }, + modifiers, + phrases: { type: "single", phrase }, + } as Preposition; + }), + binaryWords(PREPOSITION, "preposition").map(([preposition, phrase]) => + ({ + preposition: { + type: "default", + word: preposition, + emphasis: null, + }, + modifiers: [], + phrases: { + type: "single", + phrase: { + type: "default", + headWord: { + type: "default", + word: phrase, + emphasis: null, + }, + modifiers: [], + emphasis: null, + }, + }, + emphasis: null, + }) as Preposition + ), + sequence( + optionalCombined(PREPOSITION, "preposition"), + modifiers(), + nestedPhrases(["anu"]), + optionalEmphasis(), + ) + .map(([[preposition, modifier], modifiers, phrases, emphasis]) => + ({ + preposition, + modifiers: [...modifier, ...modifiers], + phrases, + emphasis, + }) as Preposition + ), + ) + .filter(filter(PREPOSITION_RULE)); +} +/** + * Parses associated predicates whose predicates only uses top level operator. + */ +function associatedPredicates( + nestingRule: Array<"li" | "o" | "anu">, +): Parser { + return sequence( + nestedPhrasesOnly(nestingRule), + optional( + optionalComma() + .with(specificWord("e")) + .with(nestedPhrases(["e", "anu"])), + ), + many(optionalComma().with(preposition())), + ) + .filter(([_, objects, prepositions]) => + objects != null || prepositions.length > 0 + ) + .sortBy(([_, _1, prepositions]) => -prepositions.length) + .map(([predicates, objects, prepositions]) => ({ + type: "associated", + predicates, + objects, + prepositions, + })); +} +/** Parses multiple predicates without "li" nor "o" at the beginning. */ +function multiplePredicates( + nestingRule: Array<"li" | "o" | "anu">, +): Parser { + if (nestingRule.length === 0) { + return choice( + associatedPredicates([]), + phrase().map((predicate) => + ({ type: "single", predicate }) as MultiplePredicates + ), + ); + } else { + const [first, ...rest] = nestingRule; + let type: "and conjunction" | "anu"; + if (first === "li" || first === "o") { + type = "and conjunction"; + } else { + type = "anu"; + } + return choice( + associatedPredicates(nestingRule), + sequence( + choice( + associatedPredicates(nestingRule), + multiplePredicates(rest), + ), + manyAtLeastOnce( + optionalComma() + .with(specificWord(first)) + .with( + choice( + associatedPredicates(nestingRule), + multiplePredicates(rest), + ), + ), + ), + ) + .map(([group, moreGroups]) => + ({ type, predicates: [group, ...moreGroups] }) as MultiplePredicates + ), + multiplePredicates(rest), + ); + } +} +/** Parses a single clause. */ +function clause(): Parser { + return choice( + sequence( + wordFrom(new Set(["mi", "sina"]), "mi/sina subject"), + multiplePredicates(["li", "anu"]), + ) + .map(([subject, predicates]) => + ({ + type: "li clause", + subjects: { + type: "single", + phrase: { + type: "default", + headWord: { + type: "default", + word: subject, + emphasis: null, + }, + alaQuestion: false, + modifiers: [], + emphasis: null, + }, + }, + predicates, + explicitLi: false, + }) as Clause + ), + sequence( + preposition(), + many(optionalComma().with(preposition())), + ) + .map(([preposition, morePreposition]) => + ({ + type: "prepositions", + prepositions: [preposition, ...morePreposition], + }) as Clause + ), + subjectPhrases() + .filter((phrases) => + phrases.type !== "single" || phrases.phrase.type !== "quotation" + ) + .map((phrases) => ({ type: "phrases", phrases }) as Clause), + subjectPhrases() + .skip(specificWord("o")) + .map((phrases) => ({ type: "o vocative", phrases }) as Clause), + sequence( + subjectPhrases(), + optionalComma() + .with(specificWord("li")) + .with(multiplePredicates(["li", "anu"])), + ) + .map(([subjects, predicates]) => + ({ + type: "li clause", + subjects, + predicates, + explicitLi: true, + }) as Clause + ), + specificWord("o") + .with(multiplePredicates(["o", "anu"])) + .map((predicates) => + ({ type: "o clause", subjects: null, predicates }) as Clause + ), + sequence( + subjectPhrases(), + optionalComma() + .with(specificWord("o")) + .with(multiplePredicates(["o", "anu"])), + ) + .map(([subjects, predicates]) => + ({ type: "o clause", subjects, predicates }) as Clause + ), + ) + .filter(filter(CLAUSE_RULE)); +} +/** Parses a single clause including preclause and postclause. */ +function fullClause(): Parser { + return choice( + sequence( + optional(emphasis().skip(optionalComma())), + optional( + wordUnit(new Set(["kin", "taso"]), "taso/kin").skip(optionalComma()), + ), + clause(), + optional( + optionalComma() + .with(specificWord("anu")) + .with(wordUnit(new Set(["seme"]), '"seme"')), + ), + optional(optionalComma().with(emphasis())), + ) + .map(([startingParticle, kinOrTaso, clause, anuSeme, endingParticle]) => + ({ + type: "default", + startingParticle, + kinOrTaso, + clause, + anuSeme, + endingParticle, + }) as FullClause + ) + .sort((clause) => { + if ((clause as FullClause & { type: "default" }).anuSeme == null) { + return 1; + } else { + return 0; + } + }), + emphasis() + .map((emphasis) => ({ type: "filler", emphasis }) as FullClause), + ) + .filter(filter(FULL_CLAUSE_RULE)); +} +/** parses "la" with optional comma around. */ +function la(): Parser { + return choice( + comma().with(specificWord("la")), + specificWord("la").skip(comma()), + specificWord("la"), + ); +} +/** Parses a single full sentence with optional punctuations. */ +function sentence(): Parser { + return sequence( + many(fullClause().skip(la())), + fullClause(), + choice( + eol().map(() => ""), + punctuation(), + ), + ) + .map(([laClauses, finalClause, punctuation]) => { + const wordUnits = [...laClauses, finalClause] + .flatMap(everyWordUnitInFullClause); + let interrogative = null; + if (wordUnits.some((wordUnit) => wordUnit.type === "x ala x")) { + interrogative = "x ala x" as const; + } else if ( + wordUnits.some((wordUnit) => + (wordUnit.type === "default" || wordUnit.type === "reduplication") && + wordUnit.word === "seme" + ) + ) { + interrogative = "seme" as const; + } + return { + laClauses, + finalClause, + interrogative, + punctuation, + }; + }) + .filter(filter(SENTENCE_RULE)); +} +/** A multiple sentence parser for final parser. */ +const FULL_PARSER = spaces() + .with(choiceOnlyOne( + wordFrom(TOKI_PONA_WORD, "Toki Pona word") + .skip(eol()) + .map((word) => ({ type: "single word", word }) as MultipleSentences), + allAtLeastOnce(sentence()) + .skip(eol()) + .filter(filter(MULTIPLE_SENTENCES_RULE)) + .map((sentences) => + ({ type: "sentences", sentences }) as MultipleSentences + ), + )); +/** Turns string into Toki Pona AST. */ +export function parse(src: string): Output { + if (/\n/.test(src.trim())) { + return new Output(new UnrecognizedError("multiline text")); + } + return FULL_PARSER.parse(src); +} diff --git a/src/ast.ts b/src/ast.ts index 3c411f6..a3a5305 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,16 +1,33 @@ -/** Represents a word unit. */ -export type WordUnit = +/** Module for describing Toki Pona AST. */ + +import { nullableAsArray } from "./misc.ts"; + +/** Represents an emphasis particle. */ +export type Emphasis = + | { type: "word"; word: string } + | { type: "long word"; word: string; length: number } + | { type: "multiple a"; count: number }; +export type SimpleHeadedWordUnit = | { type: "default"; word: string } | { type: "x ala x"; word: string } - | { type: "reduplication"; word: string; count: number } - | { type: "numbers"; numbers: Array }; + | { type: "reduplication"; word: string; count: number }; +export type SimpleWordUnit = + | SimpleHeadedWordUnit + | { type: "number"; number: number }; +export type HeadedWordUnit = + & SimpleHeadedWordUnit + & { emphasis: null | Emphasis }; +/** Represents a word unit. */ +export type WordUnit = + & SimpleWordUnit + & { emphasis: null | Emphasis }; /** Represents a single modifier. */ export type Modifier = | { type: "default"; word: WordUnit } | { type: "proper words"; words: string } | { type: "pi"; phrase: Phrase } | { type: "nanpa"; nanpa: WordUnit; phrase: Phrase } - | { type: "quotation"; quotation: Quotation }; + | ({ type: "quotation" } & Quotation); /** * Represents a phrase including preverbial phrases, quotations, and * prepositional phrases intended for predicate. @@ -20,35 +37,28 @@ export type Phrase = type: "default"; headWord: WordUnit; modifiers: Array; + emphasis: null | Emphasis; } | { type: "preverb"; - preverb: WordUnit; + preverb: HeadedWordUnit; modifiers: Array; phrase: Phrase; + emphasis: null | Emphasis; } - | { - type: "preposition"; - preposition: Preposition; - } - | { - type: "quotation"; - quotation: Quotation; - }; -/** Represents multiple phrases separated by repeated particle or _anu_. */ + | ({ type: "preposition" } & Preposition) + | ({ type: "quotation" } & Quotation); +/** Represents multiple phrases separated by repeated particle or "anu". */ export type MultiplePhrases = | { type: "single"; phrase: Phrase } - | { - type: "and conjunction"; - phrases: Array; - } + | { type: "and conjunction"; phrases: Array } | { type: "anu"; phrases: Array }; /** Represents a single prepositional phrase. */ export type Preposition = { - preposition: WordUnit; + preposition: HeadedWordUnit; modifiers: Array; - /** This cannot be an "and conjunction": only "anu" or "single". */ - phrases: MultiplePhrases; + phrases: MultiplePhrases & { type: "single" | "anu" }; + emphasis: null | Emphasis; }; /** Represents multiple predicates. */ export type MultiplePredicates = @@ -64,112 +74,152 @@ export type MultiplePredicates = /** Represents a simple clause. */ export type Clause = | { type: "phrases"; phrases: MultiplePhrases } - | { - type: "o vocative"; - phrases: MultiplePhrases; - } + | { type: "o vocative"; phrases: MultiplePhrases } | { type: "li clause"; subjects: MultiplePhrases; predicates: MultiplePredicates; + explicitLi: boolean; } | { type: "o clause"; subjects: null | MultiplePhrases; predicates: MultiplePredicates; } + | { type: "prepositions"; prepositions: Array } + | ({ type: "quotation" } & Quotation); +/** Represents a clause including preclauses and postclauses. */ +export type FullClause = | { - type: "prepositions"; - prepositions: Array; + type: "default"; + startingParticle: null | Emphasis; + kinOrTaso: null | HeadedWordUnit; + clause: Clause; + anuSeme: null | HeadedWordUnit; + endingParticle: null | Emphasis; } - | { - type: "quotation"; - quotation: Quotation; - }; -/** Represents a clause including preclause and postclause. */ -export type FullClause = { - taso: null | WordUnit; - anuSeme: null | WordUnit; - clause: Clause; -}; + | { type: "filler"; emphasis: Emphasis }; /** Represents a single full sentence. */ -export type Sentence = { laClauses: Array; punctuation: string }; +export type Sentence = { + laClauses: Array; + finalClause: FullClause; + interrogative: null | "seme" | "x ala x"; + punctuation: string; +}; /** Represents quotation. */ export type Quotation = { sentences: Array; leftMark: string; rightMark: string; }; -export function someModifierInPhrase( - phrase: Phrase, - whenQuotation: boolean, - checker: (modifier: Modifier) => boolean, -): boolean { - if (phrase.type === "default") { - return phrase.modifiers.some(checker); - } else if (phrase.type === "preverb") { - return phrase.modifiers.some(checker) || - someModifierInPhrase(phrase.phrase, whenQuotation, checker); - } else if (phrase.type === "preposition") { - const preposition = phrase.preposition; - return preposition.modifiers.some(checker) || - someModifierInMultiplePhrases( - preposition.phrases, - whenQuotation, - checker, - ); - } else if (phrase.type === "quotation") { - return whenQuotation; - } else { - throw new Error("unreachable"); +/** The final representation of whole Toki Pona input text. */ +export type MultipleSentences = + | { type: "single word"; word: string } + | { type: "sentences"; sentences: Array }; + +function everyWordUnitInModifier(modifier: Modifier): Array { + switch (modifier.type) { + case "default": + return [modifier.word]; + case "pi": + return everyWordUnitInPhrase(modifier.phrase); + case "nanpa": + return [modifier.nanpa, ...everyWordUnitInPhrase(modifier.phrase)]; + case "quotation": + case "proper words": + return []; } } -export function someModifierInMultiplePhrases( - phrases: MultiplePhrases, - whenQuotation: boolean, - checker: (modifier: Modifier) => boolean, -): boolean { - if (phrases.type === "single") { - return someModifierInPhrase(phrases.phrase, whenQuotation, checker); - } else if (phrases.type === "and conjunction" || phrases.type === "anu") { - return phrases.phrases.some((phrases) => - someModifierInMultiplePhrases(phrases, whenQuotation, checker) - ); - } else { - throw new Error("unreachable"); +function everyWordUnitInPhrase(phrase: Phrase): Array { + switch (phrase.type) { + case "default": + return [ + phrase.headWord, + ...phrase.modifiers.flatMap(everyWordUnitInModifier), + ]; + case "preverb": + return [ + phrase.preverb, + ...phrase.modifiers.flatMap(everyWordUnitInModifier), + ...everyWordUnitInPhrase(phrase.phrase), + ]; + case "preposition": + return everyWordUnitInPreposition(phrase); + case "quotation": + return []; } } -export function somePhraseInMultiplePhrases( - phrases: MultiplePhrases, - checker: (modifier: Phrase) => boolean, -): boolean { - if (phrases.type === "single") { - return checker(phrases.phrase); - } else if (phrases.type === "and conjunction" || phrases.type === "anu") { - return phrases.phrases.some((phrases) => - somePhraseInMultiplePhrases(phrases, checker) - ); - } else { - throw new Error("unreachable"); +function everyWordUnitInMultiplePhrases( + phrase: MultiplePhrases, +): Array { + switch (phrase.type) { + case "single": + return everyWordUnitInPhrase(phrase.phrase); + case "and conjunction": + case "anu": + return phrase.phrases.flatMap(everyWordUnitInMultiplePhrases); } } -export function someObjectInMultiplePredicate( +function everyWordUnitInPreposition(preposition: Preposition): Array { + return [ + preposition.preposition, + ...preposition.modifiers.flatMap(everyWordUnitInModifier), + ...everyWordUnitInMultiplePhrases(preposition.phrases), + ]; +} +function everyWordUnitInMultiplePredicates( predicate: MultiplePredicates, - checker: (object: Phrase) => boolean, -): boolean { - if (predicate.type === "single") { - return false; - } else if (predicate.type === "associated") { - if (predicate.objects) { - return somePhraseInMultiplePhrases(predicate.objects, checker); - } else { - return false; - } - } else if (predicate.type === "and conjunction" || predicate.type === "anu") { - return predicate.predicates.some((predicates) => - someObjectInMultiplePredicate(predicates, checker) - ); - } else { - throw new Error("unreachable"); +): Array { + switch (predicate.type) { + case "single": + return everyWordUnitInPhrase(predicate.predicate); + case "associated": + return [ + ...everyWordUnitInMultiplePhrases(predicate.predicates), + ...nullableAsArray(predicate.objects) + .flatMap(everyWordUnitInMultiplePhrases), + ...predicate.prepositions.flatMap(everyWordUnitInPreposition), + ]; + case "and conjunction": + case "anu": + return predicate.predicates.flatMap(everyWordUnitInMultiplePredicates); } } +function everyWordUnitInClause(clause: Clause): Array { + switch (clause.type) { + case "phrases": + case "o vocative": + return everyWordUnitInMultiplePhrases(clause.phrases); + case "li clause": + return [ + ...everyWordUnitInMultiplePhrases(clause.subjects), + ...everyWordUnitInMultiplePredicates(clause.predicates), + ]; + case "o clause": + return [ + ...nullableAsArray(clause.subjects) + .flatMap(everyWordUnitInMultiplePhrases), + ...everyWordUnitInMultiplePredicates(clause.predicates), + ]; + case "prepositions": + return clause.prepositions.flatMap(everyWordUnitInPreposition); + case "quotation": + return []; + } +} +export function everyWordUnitInFullClause(clause: FullClause): Array { + switch (clause.type) { + case "default": + return [ + ...nullableAsArray(clause.kinOrTaso), + ...everyWordUnitInClause(clause.clause), + ...nullableAsArray(clause.anuSeme), + ]; + case "filler": + return []; + } +} +export function everyWordUnitInSentence(sentence: Sentence): Array { + return [...sentence.laClauses, sentence.finalClause] + .flatMap(everyWordUnitInFullClause); +} diff --git a/src/composer.ts b/src/composer.ts new file mode 100644 index 0000000..3041b02 --- /dev/null +++ b/src/composer.ts @@ -0,0 +1,122 @@ +import { + AdjectivePhrase, + Clause, + NounPhrase, + Preposition, + Sentence, + Word, +} from "./english-ast.ts"; +import { TodoError } from "./error.ts"; +import { nullableAsArray } from "./misc.ts"; +import { Output, OutputError } from "./output.ts"; +import { translate as translateToAst } from "./translator.ts"; + +const emphasisStartingTag = ""; +const emphasisEndingTag = ""; + +function word(word: Word): string { + if (word.emphasis) { + return `${emphasisStartingTag}${word.word}${emphasisEndingTag}`; + } else { + return word.word; + } +} +function compound( + elements: Array, + conjunction: string, + depth: number, +): string { + if (depth !== 0 || elements.length === 2) { + return elements.join(` ${conjunction} `); + } else { + const lastIndex = elements.length - 1; + const init = elements.slice(0, lastIndex); + const last = elements[lastIndex]; + return `${init.join(", ")} ${conjunction} ${last}`; + } +} +function noun(phrases: NounPhrase, depth: number): string { + switch (phrases.type) { + case "simple": { + const text = [ + ...phrases.determiner.map((determiner) => word(determiner.determiner)), + ...phrases.adjective.map(adjective), + word(phrases.noun), + ...nullableAsArray(phrases.postCompound).map(noun), + ...nullableAsArray(phrases.postAdjective) + .map((adjective) => `${adjective.adjective} ${adjective.name}`), + ...phrases.preposition.map(preposition), + ].join(" "); + return word({ word: text, emphasis: phrases.emphasis }); + } + case "compound": + return compound( + phrases.nouns.map((phrase) => noun(phrase, depth + 1)), + phrases.conjunction, + depth, + ); + } +} +function adjective(phrases: AdjectivePhrase, depth: number): string { + let text: string; + switch (phrases.type) { + case "simple": + text = [...phrases.adverb.map(word), word(phrases.adjective)] + .join(" "); + break; + case "compound": + text = compound( + phrases.adjective.map((phrase) => adjective(phrase, depth + 1)), + phrases.conjunction, + depth, + ); + } + return word({ word: text, emphasis: phrases.emphasis }); +} +function preposition(preposition: Preposition): string { + return `${word(preposition.preposition)} ${noun(preposition.object, 0)}`; +} +function clause(ast: Clause): string { + switch (ast.type) { + case "free form": + return ast.text; + case "interjection": + return word(ast.interjection); + case "implied it's": { + const verb = ast.verb; + let text: string; + switch (verb.type) { + case "linking noun": + text = noun(verb.noun, 0); + break; + case "linking adjective": + text = adjective(verb.adjective, 0); + break; + } + return [text!, ...verb.preposition.map(preposition)].join(" "); + } + case "subject phrase": + return noun(ast.subject, 0); + case "vocative": + return `${ast.call} ${noun(ast.addressee, 0)}`; + case "dependent": + return `${ast.conjunction} ${clause(ast.clause)}`; + default: + throw new TodoError(`composing ${ast.type}`); + } +} +function sentence(sentence: Sentence): string { + return `${sentence.clauses.map(clause).join(", ")}${sentence.punctuation}`; +} +export function translate(src: string): Output { + try { + return translateToAst(src) + .map((sentences) => sentences.map(sentence).join(" ")); + } catch (error) { + if (error instanceof OutputError) { + return new Output(error); + } else { + throw error; + } + } +} diff --git a/src/definition.ts b/src/definition.ts deleted file mode 100644 index a77abe5..0000000 --- a/src/definition.ts +++ /dev/null @@ -1,1761 +0,0 @@ -// TODO: avoid certain adjectives to be modified by adverb: this applies to -// possessive adjectives: my, our, your, etc. - -/** Represents possible translations of words. */ -export type Translation = { - noun: Array; - adjective: Array; - adverb: Array; - - // Verb definitions usable as noun or adjective - gerundVerb: Array; - - // Transitive means there's work applying to the object (or just sensing it) - pastTransitive: Array; - presentTransitive: Array; - - // Intransitive means there's nothing done to anything but the subject itself - pastIntransitive: Array; - presentIntransitive: Array; - - interjection?: Array; -}; -/** Record of word translations. */ -export const DEFINITION: { [key: string]: Translation } = { - // All Linku definitions are gathered from: - // https://github.com/lipu-linku/sona/blob/main/words/translations/eng/definitions.toml - // Last commit used: 69ecccb - - // TODO: preverb - // TODO: preposition - - // akesi - // Linku: reptile, amphibian - akesi: { - noun: ["reptile", "reptiles", "amphibian", "amphibians"], - adjective: ["reptilian", "amphibian"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ala - // Linku: no, not, zero; [~ ala ~] (used to form a yes-no question); nothing - ala: { - noun: ["nothing", "no", "zero"], - adjective: ["not", "no", "zero"], - adverb: ["not"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // alasa - // Linku: hunt, search, forage, attempt; (preverb) try to - alasa: { - noun: [], - adjective: [], - adverb: [], - gerundVerb: ["searching"], - pastTransitive: ["hunted", "searched"], - presentTransitive: ["hunt", "search"], - pastIntransitive: [], - presentIntransitive: [], - }, - // ale/ali - // Linku: all, every, everything, entirety; any, anything; (number) one hundred - ale: { - noun: ["everything", "anything", "entirety", "100"], - adjective: ["all", "every", "100"], - adverb: ["completely"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - ali: { - noun: ["everything", "anything", "entirety", "100"], - adjective: ["all", "every", "100"], - adverb: ["completely"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // anpa - // Linku: bowing down, downward, humble, lowly, dependent | ALT bottom, lower part, under, below, floor, beneath; low, lower, bottom, down - // Duplicates: - // - bottom, lower part - bottom is preferred due to having one word - // - under, below - below is preferred - anpa: { - noun: ["bottom", "below", "floor"], - adjective: [ - "bowing down", - "downward", - "humble", - "lowly", - "dependent", - "low", - "lower", - "bottom", - "down", - ], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ante - // Linku: change, difference, modification; other, altered; to modify - ante: { - noun: [ - "change", - "changes", - "difference", - "differences", - "modification", - "modifications", - ], - adjective: ["different", "other", "altered"], - adverb: [], - gerundVerb: [], - pastTransitive: ["modified"], - presentTransitive: ["modify"], - pastIntransitive: [], - presentIntransitive: [], - }, - // awen - // Linku: enduring, kept, protected, safe, waiting, staying; (pv.) to continue to, to keep - // Duplicates: - // - waiting, staying (not really, one could stay and not wait, one could wait and not stay) - awen: { - noun: ["waiting", "staying"], - adjective: ["enduring", "kept", "protected", "safe", "waiting", "staying"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // esun - // Linku: trade, barter, exchange, swap, buy, sell; market, shop, fair, bazaar, place of business - // Duplicates: - // - market, shop, fair, bazaar - shop is preferred - esun: { - noun: ["shop", "shops"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [ - "traded", - "bartered", - "exchanged", - "swapped", - "bought", - "sold", - ], - presentTransitive: ["trade", "barter", "exchange", "swap", "buy", "sell"], - pastIntransitive: [], - presentIntransitive: [], - }, - // ijo - // Linku: thing, phenomenon, object, matter - // - thing, object - object is preferred - ijo: { - noun: [ - "phenomenon", - "phenomenons", - "object", - "objects", - "matter", - "matters", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ike - // Linku: negative quality, e.g. bad, unpleasant, harmful, unneeded - ike: { - noun: ["negative quality"], - adjective: ["bad", "unpleasant", "harmful", "unneeded"], - adverb: ["badly", "unpleasantly", "harmfully"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ilo - // Linku: tool, implement, machine, device - // Duplicates: - // - machine, device - machine is preferred - ilo: { - noun: ["tool", "tools", "implement", "implements", "machine", "machines"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // insa - // Linku: centre, content, inside, between; internal organ, stomach - insa: { - noun: ["centre", "content", "contents", "inside"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // jaki - // Linku: disgusting, obscene, sickly, toxic, unclean, unsanitary - // Duplicates: - // - disgusting, obscene - disgusting is preferred - // - unclean, unsanitary - unclean is preferred - jaki: { - noun: ["obscenity", "obscenities"], - adjective: ["disgusting", "sickly", "toxic", "unclean"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // jan - // Linku: human being, person, somebody - jan: { - noun: ["human being", "person", "people", "somebody"], - adjective: ["person-like"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // jelo - // Linku: yellow, amber, golden, lime yellow, yellowish orange - // Removed: amber could be confused with the tree resin - jelo: { - noun: ["yellow", "lime yellow", "yellowish orange"], - adjective: ["yellow", "golden", "lime yellow", "yellowish orange"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // jo - // Linku: to have, carry, contain, hold - jo: { - noun: ["possession", "possessions"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: ["had", "carried", "contained", "held"], - presentTransitive: ["have", "carry", "contain", "hold"], - pastIntransitive: [], - presentIntransitive: [], - }, - // kala - // Linku: fish, marine animal, sea creature - // Duplicates - // - marine animal, sea creature - sea creature is preferred - kala: { - noun: ["fish", "fishes", "sea creature", "sea creatures"], - adjective: ["fish-like", "swimming"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kalama - // Linku: to produce a sound; recite, utter aloud - kalama: { - noun: ["sound", "sounds"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: ["sounded"], - presentIntransitive: ["sound"], - }, - // kama - // Linku: arriving, coming, future, summoned; (pv.) to become, manage to, succeed in - // Duplicates: - // - arriving, coming - arriving is preferred - kama: { - noun: ["arriving", "future"], - adjective: ["arriving", "future", "summoned"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kasi - // Linku: plant, vegetation; herb, leaf - // Duplicates: - // - plant, vegetation - plant is preferred - kasi: { - noun: ["plant", "plants", "herb", "herbs", "leaf", "leaves"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ken - // Linku: to be able to, be allowed to, can, may; possible - ken: { - noun: ["ability", "abilities", "possibility", "possibilities"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kepeken - // Linku: to be able to, be allowed to, can, may; possible - kepeken: { - noun: [], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kili - // Linku: fruit, vegetable, mushroom - kili: { - noun: [ - "fruit", - "fruits", - "vegetable", - "vegetables", - "mushroom", - "mushrooms", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kiwen - // Linku: hard object, metal, rock, stone - kiwen: { - noun: [ - "hard object", - "hard objects", - "metal", - "metals", - "rock", - "rocks", - "stone", - "stones", - ], - adjective: ["hard"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ko - // Linku: semi-solid, e.g. paste, powder, goo, sand, soil, clay; squishy, moldable - ko: { - noun: ["semi-solid", "paste", "powder", "goo", "sand", "soil", "clay"], - adjective: ["squishy", "moldable"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kon - // Linku: air, breath; essence, spirit; hidden reality, unseen agent - kon: { - noun: [ - "air", - "breath", - "essence", - "spirit", - "hidden reality", - "unseen agent", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kule - // Linku: color, pigment; category, genre, flavor; relating to queerness, relating to the LGBT+ community - // Duplicates: - // - category, genre - category is preferred - kule: { - noun: [ - "color", - "colors", - "pigment", - "pigments", - "category", - "categories", - "flavor", - "flavors", - "queerness", - ], - adjective: ["colorful", "queer"], - adverb: ["colorfully"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kulupu - // Linku: community, company, group, nation, society, tribe - kulupu: { - noun: [ - "community", - "communities", - "company", - "companies", - "group", - "groups", - "nation", - "nations", - "society", - "tribe", - "tribes", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // kute - // Linku: ear; to hear, listen; pay attention to, obey - kute: { - noun: ["ear", "ears", "listening"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: ["listened"], - presentTransitive: ["listen"], - pastIntransitive: [], - presentIntransitive: [], - }, - // lape - // Linku: sleep, rest, break from an activity or work - lape: { - noun: ["sleep", "rest"], - adjective: ["sleeping", "resting"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: ["slept", "rested"], - presentIntransitive: ["sleep", "rest"], - }, - // laso - // Linku: turquoise, blue, green, cyan, indigo, lime green - laso: { - noun: ["turquoise", "blue", "green", "cyan", "indigo", "lime green"], - adjective: ["turquoise", "blue", "green", "cyan", "indigo", "lime green"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // lawa - // Linku: head, mind; to control, direct, guide, lead, own, plan, regulate, rule - lawa: { - noun: ["head", "heads", "mind", "minds", "guide", "plan", "rule"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [ - "controlled", - "directed", - "guided", - "led", - "owned", - "regulated", - "ruled", - ], - presentTransitive: [ - "control", - "direct", - "guide", - "lead", - "own", - "regulate", - "rule", - ], - pastIntransitive: [], - presentIntransitive: [], - }, - // len - // Linku: cloth, clothing, fabric, textile; cover, layer of privacy - // Duplicates: - // - cloth, fabric, textile - fabric is preferred - len: { - noun: ["clothing", "fabric", "hiding"], - adjective: ["hidden"], - adverb: [], - gerundVerb: [], - pastTransitive: ["covered"], - presentTransitive: ["cover"], - pastIntransitive: [], - presentIntransitive: [], - }, - // lete - // Linku: cool, cold, frozen; freeze - lete: { - noun: ["coldness"], - adjective: ["cool", "cold", "frozen"], - adverb: [], - gerundVerb: [], - pastTransitive: ["froze"], - presentTransitive: ["freeze"], - pastIntransitive: [], - presentIntransitive: [], - }, - // lili - // Linku: small, short, young; few; piece of - // TODO: cover "piece of" as special prefix - lili: { - noun: ["smallness"], - adjective: ["small", "short", "young", "few"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // linja - // Linku: long and flexible thing; cord, hair, rope, thread, yarn | ALT line, connection - // Duplicates: - // - thread, yarn - thread is preferred - linja: { - noun: [ - "long flexible thing", - "long flexible things", - "cord", - "cords", - "hair", - "rope", - "ropes", - "line", - "lines", - "connection", - "connections", - ], - adjective: ["long flexible"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // lipu - // Linku: flat object; book, document, card, paper, record, website - lipu: { - noun: [ - "flat object", - "book", - "books", - "document", - "documents", - "card", - "cards", - "paper", - "papers", - "record", - "records", - "website", - "websites", - ], - adjective: ["flat"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // loje - // Linku: red, magenta, scarlet, pink, rust-colored, reddish orange - loje: { - noun: ["red", "magenta", "scarlet", "pink", "rust-color", "reddish orange"], - adjective: [ - "red", - "magenta", - "scarlet", - "pink", - "rust-colored", - "reddish orange", - ], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // lon - // Linku: located at, present at, real, true, existing - lon: { - noun: ["truth"], - adjective: ["real", "truth", "existing"], - adverb: ["truthfully"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // luka - // Linku: hand, arm, tactile organ, grasping organ; (number) five - luka: { - noun: [ - "hand", - "hands", - "arm", - "arms", - "tactile organ", - "tactile organs", - "grasping organ", - "grasping organs", - "5", - ], - adjective: ["5"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // lukin - // Linku: look, view, examine, read, watch; appearance, visual; eye, seeing organ; (preverb) try to - // Duplicate: - // - look and view - view is preferred - lukin: { - noun: ["appearance", "visual", "eye", "seeing organ", "seeing organs"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: ["viewed", "read", "watched"], - presentTransitive: ["view", "read", "watch"], - pastIntransitive: [], - presentIntransitive: [], - }, - // lupa - // Linku: hole, pit, cave, doorway, window, portal - lupa: { - noun: [ - "hole", - "holes", - "pit", - "cave", - "caves", - "doorway", - "window", - "windows", - "portal", - "portals", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ma - // Linku: earth, land; outdoors, world; country, territory; soil - ma: { - noun: [ - "earth", - "land", - "outdoors", - "world", - "country", - "territory", - "soil", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // mama - // Linku: parent, ancestor; creator, originator; caretaker, sustainer, guardian - mama: { - noun: [ - "parent", - "parents", - "ancestor", - "ancestors", - "creator", - "creators", - "originator", - "originators", - "caretaker", - "caretakers", - "sustainer", - "guardian", - "guardians", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // mani - // Linku: money, cash, savings, wealth; large domesticated animal - mani: { - noun: [ - "money", - "cash", - "savings", - "wealth", - "large domestic animal", - "large domestic animals", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // meli - // Linku: woman, female, feminine person; wife - meli: { - noun: [ - "woman", - "women", - "female", - "feminine person", - "feminine people", - "wife", - ], - adjective: ["woman", "female", "feminine"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // mi - // Linku: I, me, we, us - mi: { - noun: ["I", "me", "we", "us"], - adjective: ["my", "our"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // mije - // Linku: man, male, masculine person; husband - mije: { - noun: [ - "man", - "men", - "male", - "masculine person", - "masculine people", - "husband", - ], - adjective: ["man", "male", "masculine"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // moku - // Linku: to eat, drink, consume, swallow, ingest - // Duplicate: - // - swallow, ingest - either way is fine - // NOTE: should I use drank or drunk? - moku: { - noun: ["food", "foods", "drink", "drinks"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: ["ate", "drank", "consumed", "ingested"], - presentTransitive: ["eat", "drink", "consume", "ingest"], - pastIntransitive: [], - presentIntransitive: [], - }, - // moli - // Linku: dead, dying - moli: { - noun: ["death"], - adjective: ["dead", "dying"], - adverb: [], - gerundVerb: [], - pastTransitive: ["killed"], - presentTransitive: ["kill"], - pastIntransitive: [], - presentIntransitive: [], - }, - // monsi - // Linku: back, behind, rear - monsi: { - noun: ["back", "behind", "rear"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // mu - // Linku: (animal noise or communication) | ALT (non-speech vocalization) - // TODO: this is a tricky word - mu: { - noun: ["*noises*"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // mun - // Linku: moon, night sky object, star, celestial body - mun: { - noun: [ - "moon", - "night sky object", - "night sky objects", - "star", - "stars", - "celestial body", - "celestial bodies", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // musi - // Linku: fun, game, entertainment, art, play, amusing, interesting, comical, silly - musi: { - noun: ["fun", "game", "games", "entertainment", "art", "arts", "play"], - adjective: ["fun", "amusing", "interesting", "comical", "silly"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: ["had fun"], - presentIntransitive: ["have fun"], - }, - // mute - // Linku: many, several, very; quantity; (number) twenty - mute: { - noun: ["many", "20"], - adjective: ["many", "several"], - adverb: ["very"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // nanpa - // Linku: -th (ordinal number); numbers - nanpa: { - noun: ["number", "numbers"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // nasa - // Linku: unusual, strange; silly; drunk, intoxicated - nasa: { - noun: ["silliness", "strangeness"], - adjective: ["unusual", "strange", "silly", "drunk", "intoxicated"], - adverb: ["strangely"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // nasin - // Linku: way, custom, doctrine, method, path, road - // Duplicate: - // - path, road - path is preferred - nasin: { - noun: [ - "way", - "ways", - "custom", - "customs", - "doctrine", - "doctrines", - "method", - "methods", - "path", - "paths", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // nena - // Linku: bump, button, hill, mountain, nose, protuberance - nena: { - noun: [ - "bump", - "bumps", - "hill", - "hills", - "mountain", - "nose", - "noses", - "protuberance", - "protuberances", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // ni - // Linku: that, this - ni: { - noun: ["this", "that"], - adjective: ["this", "that"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // nimi - // Linku: name, word - nimi: { - noun: ["name", "names", "word", "words"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // noka - // Linku: foot, leg, organ of locomotion, roots - noka: { - noun: [ - "foot", - "feet", - "leg", - "legs", - "locomotive organ", - "locomotive organs", - "roots", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // olin - // Linku: to have a strong emotional bond (with), e.g. affection, appreciation, (respect), platonic, romantic or familial relationships - olin: { - noun: ["affection", "appreciation", "respect", "relationship"], - adjective: ["platonic", "romantic", "familial"], - adverb: [], - gerundVerb: [], - pastTransitive: ["respected"], - presentTransitive: ["respect"], - pastIntransitive: [], - presentIntransitive: [], - }, - // ona - // Linku: he, she, it, they - // Removed he and she - ona: { - noun: ["they", "them", "it"], - adjective: ["their", "its"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // open - // Linku: begin, start; open; turn on - open: { - noun: ["beginning", "start"], - adjective: ["open", "turned on"], - adverb: [], - gerundVerb: [], - pastTransitive: ["started", "turned on"], - presentTransitive: ["start", "turn on"], - pastIntransitive: [], - presentIntransitive: [], - }, - // pakala - // Linku: botched, broken, damaged, harmed, messed up | ALT (curse expletive, e.g. fuck!) - pakala: { - noun: ["mess", "damage", "damages"], - adjective: ["botched", "broken", "damaged", "harmed", "messed up"], - adverb: [], - gerundVerb: [], - pastTransitive: ["botched", "broke", "damaged", "harmed", "messed up"], - presentTransitive: ["botch", "break", "damage", "harm", "mess up"], - pastIntransitive: [], - presentIntransitive: [], - interjection: ["pakala"], - }, - // pan - // Linku: grains, starchy foods, baked goods; e.g. rice, sorghum, bread, noodles, masa, porridge, injera - // NOTE: should I put these examples? NAH - pan: { - noun: ["grain", "grains", "starchy food", "starchy foods", "baked goods"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // pana - // Linku: give, send, emit, provide, put, release - pana: { - noun: [], - adjective: [], - adverb: [], - gerundVerb: ["giving"], - pastTransitive: ["gave", "sent", "emitted", "provided", "put", "released"], - presentTransitive: ["give", "send", "emit", "provide", "put", "release"], - pastIntransitive: [], - presentIntransitive: [], - }, - // pali - // Linku: do, take action on, work on; build, make, prepare - pali: { - noun: [], - adjective: [], - adverb: [], - gerundVerb: ["doing", "working", "build", "made", "prepared"], - pastTransitive: ["do", "work", "build", "make", "prepare"], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // palisa - // Linku: long hard thing; branch, rod, stick - palisa: { - noun: [ - "long hard thing", - "long hard things", - "branch", - "branches", - "rod", - "rods", - "stick", - "sticks", - ], - adjective: ["long hard"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // pilin - // Linku: heart (physical or emotional); feeling (an emotion, a direct experience) - pilin: { - noun: ["heart", "feeling", "feelings"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: ["touched", "felt"], - presentTransitive: ["touch", "feel"], - pastIntransitive: [], - presentIntransitive: [], - }, - // pimeja - // Linku: dark, unlit; dark color, e.g. black, purple, brown - // Duplicates: - // - dark, unlit - dark is preferred - pimeja: { - noun: ["darkness", "dark color"], - adjective: ["dark", "unlit", "black", "purple", "brown"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // pini - // Linku: ago, completed, ended, finished, past - // Duplicates: - // - completed, ended, finished - ended is preferred - pini: { - noun: ["past"], - adjective: ["ago", "ended"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // pipi - // Linku: bug, insect, ant, spider - // Overlap: - // - ant as insect - // - spider as bug - pipi: { - noun: ["insect", "insects", "bug", "bugs"], - adjective: ["bug-like", "insect-like"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // poka - // Linku: hip, side; next to, nearby, vicinity | ALT along with (comitative), beside - poka: { - noun: ["hip", "hips", "side", "sides", "vicinity"], - adjective: ["nearby"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // poki - // Linku: container, bag, bowl, box, cup, cupboard, drawer, vessel - // Overlaps - poki: { - noun: ["container", "containers"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // pona - // Linku: good, positive, useful; friendly, peaceful; simple - pona: { - noun: ["goodness", "simplicity"], - adjective: ["good", "positive", "useful", "friendly", "peaceful", "simple"], - adverb: ["nicely"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // pu - // Linku: to interact with the book Toki Pona: The Language of Good (2014) by Sonja Lang - // TODO: Maybe special suffix: "with the book", "related to the book" - pu: { - noun: [], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sama - // Linku: same, similar; each other; sibling, peer, fellow; as, like - sama: { - noun: ["similarity", "sibling", "peer", "fellow"], - adjective: ["same", "similar"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // seli - // Linku: fire; cooking element, chemical reaction, heat source - seli: { - noun: [ - "fire", - "cooking element", - "cooking elements", - "chemical reaction", - "chemical reactions", - "heat source", - "heat sources", - ], - adjective: ["hot"], - adverb: [], - gerundVerb: [], - pastTransitive: ["heated"], - presentTransitive: ["heat"], - pastIntransitive: [], - presentIntransitive: [], - }, - // selo - // Linku: outer form, outer layer; bark, peel, shell, skin; boundary - // Duplicates: - // - bark, peel, shell, skin - skin is preferred - selo: { - noun: ["outer form", "outer layer", "skin", "boundary"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // seme - // Linku: what? which? - seme: { - noun: ["what", "which"], - adjective: ["what", "which"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sewi - // Linku: area above, highest part, something elevated; awe-inspiring, divine, sacred, supernatural - sewi: { - noun: ["area above", "highest part"], - adjective: [ - "highest", - "elevated", - "awe-inspiring", - "divine", - "sacred", - "supernatural", - ], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sijelo - // Linku: body (of person or animal), physical state, torso - sijelo: { - noun: ["body", "bodies", "physical state", "physical states", "torso"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sike - // Linku: round or circular thing; ball, circle, cycle, sphere, wheel; of one year - sike: { - noun: ["round thing", "round things", "cycle"], - adjective: ["round"], - adverb: ["repeatedly"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sin - // Linku: new, fresh; additional, another, extra - sin: { - noun: ["newness"], - adjective: ["new", "fresh", "additional", "another", "extra"], - adverb: ["newly"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sina - // Linku: you - sina: { - noun: ["you"], - adjective: ["your"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sinpin - // Linku: face, foremost, front, wall - sinpin: { - noun: ["face", "faces", "wall", "walls"], - adjective: ["foremost"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sitelen - // Linku: image, picture, representation, symbol, mark, writing - sitelen: { - noun: [ - "image", - "images", - "picture", - "pictures", - "representation", - "symbol", - "symbols", - "mark", - "marks", - "writing", - "writings", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // sona - // Linku: know, be skilled in, be wise about, have information on; (pv.) know how to - sona: { - noun: ["knowledge"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // soweli - // Linku: fuzzy creature, land animal, beast - soweli: { - noun: [ - "fuzzy creature", - "fuzzy creatures", - "land animal", - "land animals", - "beast", - "beasts", - ], - adjective: ["animal-like"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // suli - // Linku: big, heavy, large, long, tall; important; adult - // Duplicates: - // - big, large - big is preferred - suli: { - noun: ["hugeness", "importance"], - adjective: ["big", "heavy", "important", "adult"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // suno - // Linku: sun; light, brightness, glow, radiance, shine; light source - suno: { - noun: ["sun", "light", "brightness", "glow", "radiance", "light source"], - adjective: ["shining"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // supa - // Linku: horizontal surface, thing to put or rest something on - supa: { - noun: ["horizontal surface", "horizontal surfaces"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // suwi - // Linku: sweet, fragrant; cute, innocent, adorable - // Duplicates: - // - cute, adorable - cute is preferred - suwi: { - noun: ["sweetness", "cuteness", "innocence"], - adjective: ["sweet", "cute", "innocent"], - adverb: ["sweetly"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // tan - // Linku: by, from, because of; origin, cause - tan: { - noun: ["origin", "cause"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // taso - // Linku: but, however; only - taso: { - noun: [], - adjective: ["only"], - adverb: ["only"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // tawa - // Linku: motion, e.g. walking, shaking, flight, travel; (preposition) to, for, going to, from the perspective of - // Duplicates: - // - flight, travel - travel is preferred - tawa: { - noun: ["motion", "travel"], - adjective: ["walking", "shaking"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // telo - // Linku: water, liquid, fluid, wet substance; beverages - // Duplicates: - // - liquid, fluid - liquid is preferred - telo: { - noun: ["water", "liquid", "fluid", "wet substance", "beverages"], - adjective: ["liquid", "wet"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // tenpo - // Linku: time, duration, moment, occasion, period, situation - tenpo: { - noun: ["time", "duration", "moment", "occasion", "period", "situation"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // toki - // Linku: communicate, say, speak, talk, use language, think; hello - // These are all overlaps - toki: { - noun: [ - "communication", - "communications", - "language", - "languages", - ], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: ["communicated"], - presentTransitive: ["communicate"], - pastIntransitive: [], - presentIntransitive: [], - interjection: ["hello"], - }, - // tomo - // Linku: indoor space; building, home, house, room - // Duplicates: - // - indoor space, room - room is preferred - tomo: { - noun: ["building", "home", "house", "room"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // tonsi - // Linku: nonbinary, gender nonconforming, genderqueer, transgender* - tonsi: { - noun: [ - "non-binary person", - "non-binary people", - "gender nonconforming person", - "gender nonconforming people", - "genderqueer person", - "genderqueer people", - "transgender person", - "transgender people", - ], - adjective: [ - "non-binary", - "gender-nonconforming", - "genderqueer", - "transgender", - ], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // tu - // Linku: (number) two; separate, divide, split; multiply, duplicate - tu: { - noun: ["2"], - adjective: ["2"], - adverb: [], - gerundVerb: [], - pastTransitive: [ - "separated", - "divided", - "split", - "multiplied", - "duplicated", - ], - presentTransitive: ["separate", "divide", "split", "multiply", "duplicate"], - pastIntransitive: [], - presentIntransitive: [], - }, - // unpa - // Linku: have sexual relations with - unpa: { - noun: ["sex"], - adjective: ["sexual"], - adverb: ["sexually"], - gerundVerb: [], - pastTransitive: ["had sex with"], - presentTransitive: ["have sex with"], - pastIntransitive: [], - presentIntransitive: [], - }, - // uta - // Linku: mouth, lips, oral cavity, jaw - uta: { - noun: ["mouth", "lips", "oral cavity", "jaw"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // utala - // Linku: battle, challenge, compete against, struggle against - utala: { - noun: ["battle", "challenge"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: ["competed against", "struggled against"], - presentTransitive: ["compete against", "struggle against"], - pastIntransitive: [], - presentIntransitive: [], - }, - // walo - // Linku: light-colored, white, pale, light gray, cream - // Removed cream - it may be confused with the actual thing - walo: { - noun: ["light-color", "white", "light gray"], - adjective: ["light-colored", "white", "pale", "light gray"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // wan - // Linku: (number) one; singular; combine, join, mix, fuse - wan: { - noun: ["1"], - adjective: ["1", "singular"], - adverb: [], - gerundVerb: [], - pastTransitive: ["combined", "mixed", "fused"], - presentTransitive: ["combine", "mix", "fuse"], - pastIntransitive: [], - presentIntransitive: [], - }, - // waso - // Linku: bird, flying creature, winged animal - waso: { - noun: [ - "bird", - "birds", - "flying creature", - "flying creatures", - "winged animal", - "winged animals", - ], - adjective: ["bird-like", "flying"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // wawa - // Linku: strong, powerful; confident, sure; energetic, intense - wawa: { - noun: ["power", "powers", "confidence", "energy", "intensity"], - adjective: ["strong", "powerful", "confident", "energetic", "intense"], - adverb: ["powerfully"], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, - // weka - // Linku: absent, away, ignored - weka: { - noun: ["leaving"], - adjective: ["absent", "away", "ignored"], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: ["leave"], - presentIntransitive: ["leave"], - }, - // wile - // Linku:want, desire, wish, require; (preverb) want to - wile: { - noun: ["want", "wants", "need", "needs"], - adjective: [], - adverb: [], - gerundVerb: [], - pastTransitive: [], - presentTransitive: [], - pastIntransitive: [], - presentIntransitive: [], - }, -}; diff --git a/src/english-ast.ts b/src/english-ast.ts new file mode 100644 index 0000000..166b3e6 --- /dev/null +++ b/src/english-ast.ts @@ -0,0 +1,106 @@ +/** Module for describing English AST. */ + +import { + AdjectiveType, + DeterminerType as OriginalDeterminerType, +} from "dictionary/type.ts"; + +export type Word = { + word: string; + emphasis: boolean; +}; +export type Quantity = "singular" | "plural" | "both" | "condensed"; +export type DeterminerType = OriginalDeterminerType | "numeral"; +export type NounPhrase = + | { + type: "simple"; + determiner: Array; + adjective: Array; + noun: Word; + number: Quantity; + postCompound: null | NounPhrase; + postAdjective: null | { adjective: string; name: string }; + preposition: Array; + emphasis: boolean; + } + | { + type: "compound"; + conjunction: string; + nouns: Array; + number: Quantity; + }; +export type Determiner = { + kind: DeterminerType; + determiner: Word; + number: Quantity; +}; +export type AdjectivePhrase = + | { + type: "simple"; + kind: AdjectiveType; + adverb: Array; + adjective: Word; + emphasis: boolean; + } + | { + type: "compound"; + conjunction: string; + adjective: Array; + emphasis: boolean; + }; +export type VerbPhrase = + | { + type: "default"; + adverb: Array; + verb: Word; + preposition: Array; + } + | { + type: "linking noun"; + linkingVerb: Word; + noun: NounPhrase; + preposition: Array; + } + | { + type: "linking adjective"; + linkingVerb: Word; + adjective: AdjectivePhrase; + preposition: Array; + } + | { + type: "compound"; + conjunction: Word; + verbs: VerbPhrase; + preposition: Array; + }; +export type Clause = + | { type: "free form"; text: string } + | { + type: "default"; + subject: NounPhrase; + verb: VerbPhrase; + object: null | NounPhrase; + preposition: Array; + } + | { type: "subject phrase"; subject: NounPhrase } + | { + type: "implied it's"; + verb: VerbPhrase; + } + | { type: "interjection"; interjection: Word } + | { type: "vocative"; call: string; addressee: NounPhrase } + | { + type: "compound"; + conjunction: string; + clauses: Array; + preposition: Array; + } + | { type: "dependent"; conjunction: Word; clause: Clause }; +export type Preposition = { + preposition: Word; + object: NounPhrase; +}; +export type Sentence = { + clauses: Array; + punctuation: string; +}; diff --git a/src/error.ts b/src/error.ts index d9d67f6..8af8cdf 100644 --- a/src/error.ts +++ b/src/error.ts @@ -1,12 +1,11 @@ -/** Represents Error used by `Output`. */ -export class OutputError extends Error {} -/** - * Represents errors that cannot be seen. This includes errors expected to be - * unreached as well as errors expected to be covered by non-error outputs. - */ -export class UnreachableError extends OutputError { - constructor() { - super("This is an error you shouldn't see... Please report this error."); +/** Module for Error datatypes. */ + +import { OutputError } from "./output.ts"; + +/** Represents Error with unexpected and expected elements. */ +export class UnexpectedError extends OutputError { + constructor(unexpected: string, expected: string) { + super(`Unexpected ${unexpected}. ${expected} were expected instead.`); } } /** Represents Error due to things not implemented yet. */ diff --git a/src/filter.ts b/src/filter.ts index 856b678..4b0934f 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -1,17 +1,20 @@ -import { somePhraseInMultiplePhrases } from "./ast.ts"; -import { MultiplePhrases } from "./ast.ts"; +/** Module describing filter rules integrated within AST Parser. */ + import { Clause, + Emphasis, + everyWordUnitInSentence, FullClause, Modifier, + MultiplePhrases, + MultiplePredicates, Phrase, Preposition, Sentence, - someModifierInPhrase, - someObjectInMultiplePredicate, WordUnit, } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; +import { describe } from "./token.ts"; /** Array of filter rules for a word unit. */ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ @@ -22,13 +25,12 @@ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ } return true; }, - // avoid reduplication of "wan" and "tu" + // "n" and multiple "a" cannot modify a word (wordUnit) => { - if ( - wordUnit.type === "reduplication" && - (wordUnit.word === "wan" || wordUnit.word === "tu") - ) { - throw new UnrecognizedError(`reduplication of ${wordUnit.word}`); + if (isMultipleAOrN(wordUnit.emphasis)) { + throw new UnrecognizedError( + `${describe(wordUnit.emphasis!)} modifying a word`, + ); } return true; }, @@ -72,23 +74,23 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ }, // nanpa construction cannot contain pi (modifier) => { - if (modifier.type === "nanpa" && modifier.phrase.type === "default") { - if ( - modifier.phrase.modifiers.some((modifier) => modifier.type === "pi") - ) { - throw new UnrecognizedError("pi inside nanpa"); - } + if ( + modifier.type === "nanpa" && + modifier.phrase.type === "default" && + modifier.phrase.modifiers.some((modifier) => modifier.type === "pi") + ) { + throw new UnrecognizedError("pi inside nanpa"); } return true; }, // nanpa construction cannot contain nanpa (modifier) => { - if (modifier.type === "nanpa" && modifier.phrase.type === "default") { - if ( - modifier.phrase.modifiers.some((modifier) => modifier.type === "nanpa") - ) { - throw new UnrecognizedError("nanpa inside nanpa"); - } + if ( + modifier.type === "nanpa" && + modifier.phrase.type === "default" && + modifier.phrase.modifiers.some((modifier) => modifier.type === "nanpa") + ) { + throw new UnrecognizedError("nanpa inside nanpa"); } return true; }, @@ -102,7 +104,7 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ // pi must follow phrases with modifier (modifier) => { if (modifier.type === "pi") { - const phrase = modifier.phrase; + const { phrase } = modifier; if (phrase.type === "default" && phrase.modifiers.length === 0) { throw new UnrecognizedError("pi followed by one word"); } @@ -112,17 +114,15 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ // pi cannot be nested (modifier) => { const checker = (modifier: Modifier) => { - if ( - modifier.type === "default" || modifier.type === "proper words" || - modifier.type === "quotation" - ) { - return false; - } else if (modifier.type === "nanpa") { - return someModifierInPhrase(modifier.phrase, false, checker); - } else if (modifier.type === "pi") { - return true; - } else { - throw new Error("unreachable error"); + switch (modifier.type) { + case "default": + case "proper words": + case "quotation": + return false; + case "nanpa": + return someModifierInPhrase(modifier.phrase, false, checker); + case "pi": + return true; } }; if (modifier.type === "pi") { @@ -132,9 +132,45 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ } return true; }, + // pi cannot have emphasis particle + (modifier) => { + if (modifier.type === "pi") { + const phrase = modifier.phrase; + if ( + ( + phrase.type === "default" || + phrase.type === "preverb" || + phrase.type === "preposition" + ) && + phrase.emphasis != null + ) { + return false; + } + } + return true; + }, + // nanpa cannot have emphasis particle + (modifier) => { + if (modifier.type === "nanpa") { + const phrase = modifier.phrase; + if ( + ( + phrase.type === "default" || + phrase.type === "preverb" || + phrase.type === "preposition" + ) && + phrase.emphasis != null + ) { + return false; + } + } + return true; + }, ]; /** Array of filter rules for multiple modifiers. */ -export const MODIFIERS_RULES: Array<(modifier: Array) => boolean> = [ +export const MULTIPLE_MODIFIERS_RULES: Array< + (modifier: Array) => boolean +> = [ // no multiple pi (modifiers) => { if (modifiers.filter((modifier) => modifier.type === "pi").length > 1) { @@ -166,6 +202,42 @@ export const MODIFIERS_RULES: Array<(modifier: Array) => boolean> = [ } return true; }, + // avoid duplicate modifiers + (modifiers) => { + const set = new Set(); + for (const modifier of modifiers) { + let word: string; + switch (modifier.type) { + case "default": + if (modifier.word.type !== "number") { + word = modifier.word.word; + break; + } else { + continue; + } + case "pi": + if ( + modifier.phrase.type === "default" && + modifier.phrase.headWord.type !== "number" + ) { + word = modifier.phrase.headWord.word; + break; + } else { + continue; + } + case "quotation": + case "proper words": + case "nanpa": + continue; + } + if (set.has(word)) { + throw new UnrecognizedError(`duplicate "${word}" in modifier`); + } else { + set.add(word); + } + } + return true; + }, ]; /** Array of filter rules for a single phrase. */ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ @@ -176,34 +248,49 @@ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ } return true; }, - // Disallow preverb modifiers other than _ala_ + // Disallow preverb modifiers other than "ala" (phrase) => { - if (phrase.type === "preverb") { - if (!modifiersIsAlaOrNone(phrase.modifiers)) { - throw new UnrecognizedError('preverb with modifiers other than "ala"'); - } + if (phrase.type === "preverb" && !modifiersIsAlaOrNone(phrase.modifiers)) { + throw new UnrecognizedError('preverb with modifiers other than "ala"'); } return true; }, // No multiple number words (phrase) => { - if (phrase.type === "default") { - if ( - phrase.headWord.type === "numbers" || - (phrase.headWord.type === "default" && - (phrase.headWord.word === "wan" || phrase.headWord.word === "tu")) - ) { - if (phrase.modifiers.some(modifierIsNumeric)) { - throw new UnrecognizedError("Multiple number words"); - } - } + if ( + phrase.type === "default" && + phrase.headWord.type === "number" && + phrase.modifiers.some(modifierIsNumeric) + ) { + throw new UnrecognizedError("Multiple number words"); + } + return true; + }, + // If the phrase has no modifiers, avoid emphasis particle + (phrase) => + phrase.type !== "default" || + phrase.emphasis == null || + phrase.modifiers.length > 0, + // "n" and multiple "a" cannot modify a phrase + (wordUnit) => { + if ( + (wordUnit.type === "default" || wordUnit.type === "preverb") && + isMultipleAOrN(wordUnit.emphasis) + ) { + throw new UnrecognizedError( + `${describe(wordUnit.emphasis!)} modifying a word`, + ); } return true; }, + // For preverbs, inner phrase must not have emphasis particle + (phrase) => + phrase.type !== "preverb" || + !phraseHasTopLevelEmphasis(phrase.phrase), ]; /** Array of filter rules for preposition. */ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ - // Disallow preverb modifiers other than _ala_ + // Disallow preverb modifiers other than "ala" (preposition) => { if (!modifiersIsAlaOrNone(preposition.modifiers)) { throw new UnrecognizedError('preverb with modifiers other than "ala"'); @@ -219,22 +306,44 @@ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ } return true; }, + // "n" and multiple "a" cannot modify a preposition + (wordUnit) => { + if (isMultipleAOrN(wordUnit.emphasis)) { + throw new UnrecognizedError( + `${describe(wordUnit.emphasis!)} modifying a word`, + ); + } + return true; + }, + // Preposition with "anu" must not have emphasis particle + (preposition) => + preposition.emphasis == null || preposition.phrases.type !== "anu", + // Inner phrase must not have emphasis particle + (preposition) => + preposition.phrases.type !== "single" || + !phraseHasTopLevelEmphasis(preposition.phrases.phrase), ]; /** Array of filter rules for clauses. */ export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ // disallow preposition in subject (clause) => { let phrases: MultiplePhrases; - if (clause.type === "phrases" || clause.type === "o vocative") { - phrases = clause.phrases; - } else if (clause.type === "li clause" || clause.type === "o clause") { - if (clause.subjects) { - phrases = clause.subjects; - } else { + switch (clause.type) { + case "phrases": + case "o vocative": + phrases = clause.phrases; + break; + case "li clause": + case "o clause": + if (clause.subjects) { + phrases = clause.subjects; + } else { + return true; + } + break; + case "prepositions": + case "quotation": return true; - } - } else { - return true; } if (somePhraseInMultiplePhrases(phrases, hasPrepositionInPhrase)) { throw new UnrecognizedError("Preposition in subject"); @@ -243,27 +352,129 @@ export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ }, // disallow preposition in object (clause) => { - if (clause.type === "li clause" || clause.type === "o clause") { + if ( + (clause.type === "li clause" || clause.type === "o clause") && + someObjectInMultiplePredicate(clause.predicates, hasPrepositionInPhrase) + ) { + throw new UnrecognizedError("Preposition in object"); + } + return true; + }, + // disallow "mi li" or "sina li" + (clause) => { + if ( + clause.type === "li clause" && + clause.explicitLi && + clause.subjects.type === "single" + ) { + const phrase = clause.subjects.phrase; if ( - someObjectInMultiplePredicate(clause.predicates, hasPrepositionInPhrase) + phrase.type === "default" && + phrase.headWord.type === "default" && + phrase.headWord.emphasis == null && + phrase.modifiers.length === 0 && + phrase.emphasis == null ) { - throw new UnrecognizedError("Preposition in object"); + const word = phrase.headWord.word; + if (word === "mi" || word === "sina") { + throw new UnrecognizedError(`"${word} li"`); + } } } return true; }, ]; export const FULL_CLAUSE_RULE: Array<(fullClase: FullClause) => boolean> = [ - // Prevent "taso ala taso" + // Prevent "taso ala taso" or "kin ala kin" (fullClause) => { - if (fullClause.taso && fullClause.taso.type === "x ala x") { - throw new UnrecognizedError('"taso ala taso"'); + if (fullClause.type === "default") { + if ( + fullClause.kinOrTaso != null && fullClause.kinOrTaso.type === "x ala x" + ) { + const word = fullClause.kinOrTaso.word; + throw new UnrecognizedError(`"${word} ala ${word}"`); + } + } + return true; + }, +]; +export const SENTENCE_RULE: Array<(sentence: Sentence) => boolean> = [ + // If there is "la", there must be no filler + (sentence) => { + if (sentence.laClauses.length > 0) { + for (const clause of [...sentence.laClauses, sentence.finalClause]) { + if (clause.type === "filler") { + throw new UnrecognizedError('filler with "la"'); + } + } + } + return true; + }, + // If there is "la", there can't be "taso" or "kin" + (sentence) => { + if (sentence.laClauses.length > 0) { + for (const clause of [...sentence.laClauses, sentence.finalClause]) { + if (clause.type === "default" && clause.kinOrTaso != null) { + throw new UnrecognizedError( + `${clause.kinOrTaso.word} particle with "la"`, + ); + } + } + } + return true; + }, + // Only the last clause can have anu seme + (sentence) => { + for (const clause of sentence.laClauses) { + if (clause.type === "default" && clause.anuSeme != null) { + throw new UnrecognizedError("anu seme inside sentence"); + } + } + return true; + }, + // Only the first clause can have starting particle + (sentence) => { + for ( + const clause of [...sentence.laClauses, sentence.finalClause].slice(1) + ) { + if (clause.type === "default" && clause.startingParticle != null) { + throw new UnrecognizedError("emphasis phrase inside sentence"); + } + } + return true; + }, + // Only the last clause can have ending particle + (sentence) => { + for (const clause of sentence.laClauses) { + if (clause.type === "default" && clause.endingParticle != null) { + throw new UnrecognizedError("emphasis phrase inside sentence"); + } + } + return true; + }, + // There can't be more than 1 "x ala x" or "seme" + (sentence) => { + if ( + sentence.interrogative != null && everyWordUnitInSentence(sentence) + .filter((wordUnit) => + wordUnit.type === "x ala x" || + ((wordUnit.type === "default" || + wordUnit.type === "reduplication") && + wordUnit.word === "seme") + ) + .length > 1 + ) { + throw new UnrecognizedError( + 'more than 1 interrogative elements: "x ala x" or "seme"', + ); } return true; }, ]; /** Array of filter rules for multiple sentences. */ -export const SENTENCES_RULE: Array<(sentences: Array) => boolean> = [ +export const MULTIPLE_SENTENCES_RULE: Array< + (sentences: Array) => boolean +> = [ // Only allow at most 2 sentences (sentences) => { if (sentences.length > 2) { @@ -278,17 +489,102 @@ export function filter( ): (value: T) => boolean { return (value) => rules.every((rule) => rule(value)); } +/** + * Helper function for checking whether some modifier passes the test + * function. + */ +export function someModifierInPhrase( + phrase: Phrase, + whenQuotation: boolean, + checker: (modifier: Modifier) => boolean, +): boolean { + switch (phrase.type) { + case "default": + return phrase.modifiers.some(checker); + case "preverb": + return phrase.modifiers.some(checker) || + someModifierInPhrase(phrase.phrase, whenQuotation, checker); + case "preposition": { + return phrase.modifiers.some(checker) || + someModifierInMultiplePhrases( + phrase.phrases, + whenQuotation, + checker, + ); + } + case "quotation": + return whenQuotation; + } +} +/** + * Helper function for checking whether some modifier passes the test + * function. + */ +export function someModifierInMultiplePhrases( + phrases: MultiplePhrases, + whenQuotation: boolean, + checker: (modifier: Modifier) => boolean, +): boolean { + switch (phrases.type) { + case "single": + return someModifierInPhrase(phrases.phrase, whenQuotation, checker); + case "and conjunction": + case "anu": + return phrases.phrases + .some((phrases) => + someModifierInMultiplePhrases(phrases, whenQuotation, checker) + ); + } +} +/** + * Helper function for checking whether some phrase passes the test + * function. + */ +export function somePhraseInMultiplePhrases( + phrases: MultiplePhrases, + checker: (modifier: Phrase) => boolean, +): boolean { + switch (phrases.type) { + case "single": + return checker(phrases.phrase); + case "and conjunction": + case "anu": + return phrases.phrases + .some((phrases) => somePhraseInMultiplePhrases(phrases, checker)); + } +} +/** + * Helper function for checking whether some object phrase passes the test + * function. + */ +export function someObjectInMultiplePredicate( + predicate: MultiplePredicates, + checker: (object: Phrase) => boolean, +): boolean { + switch (predicate.type) { + case "single": + return false; + case "associated": + if (predicate.objects) { + return somePhraseInMultiplePhrases(predicate.objects, checker); + } else { + return false; + } + case "and conjunction": + case "anu": + return predicate.predicates + .some((predicates) => + someObjectInMultiplePredicate(predicates, checker) + ); + } +} /** Helper function for checking whether a modifier is numeric. */ function modifierIsNumeric(modifier: Modifier): boolean { - if (modifier.type === "default") { - const word = modifier.word; - return word.type === "numbers" || - (word.type === "default" && - (word.word === "wan" || word.word === "tu")); - } - return false; + return modifier.type === "default" && modifier.word.type === "number"; } -/** Helper function for checking if the modifiers is exactly just _ala_ or nothing. */ +/** + * Helper function for checking if the modifiers is exactly just "ala" or nothing. + */ function modifiersIsAlaOrNone(modifiers: Array): boolean { if (modifiers.length > 1) { return false; @@ -299,16 +595,35 @@ function modifiersIsAlaOrNone(modifiers: Array): boolean { } return true; } +/** + * Helper function for determining whether the phrase has a preposition inside. + */ function hasPrepositionInPhrase(phrase: Phrase): boolean { - if (phrase.type === "default") { - return false; - } else if (phrase.type === "preposition") { - return true; - } else if (phrase.type === "preverb") { - return hasPrepositionInPhrase(phrase.phrase); - } else if (phrase.type === "quotation") { - return false; - } else { - throw new Error("unreachable"); + switch (phrase.type) { + case "default": + return false; + case "preposition": + return true; + case "preverb": + return hasPrepositionInPhrase(phrase.phrase); + case "quotation": + return false; + } +} +function isMultipleAOrN(emphasis: null | Emphasis): boolean { + return emphasis != null && + (emphasis.type === "multiple a" || + ((emphasis.type === "word" || + emphasis.type === "long word") && + emphasis.word === "n")); +} +function phraseHasTopLevelEmphasis(phrase: Phrase): boolean { + switch (phrase.type) { + case "default": + case "preverb": + case "preposition": + return phrase.emphasis != null; + case "quotation": + return false; } } diff --git a/src/lexer.ts b/src/lexer.ts new file mode 100644 index 0000000..a2c0911 --- /dev/null +++ b/src/lexer.ts @@ -0,0 +1,363 @@ +/** + * Module for lexer. It is responsible for turning string into array of token + * trees. It also latinizes UCSUR characters. + * + * Note: the words lexer and parser are used interchangeably since they both + * have the same capabilities. + */ + +import { Output } from "./output.ts"; +import { UnexpectedError, UnrecognizedError } from "./error.ts"; +import { + allAtLeastOnce, + cached, + choiceOnlyOne, + count, + match, + optionalAll, + Parser, + sequence, +} from "./parser-lib.ts"; +import { Token } from "./token.ts"; +import { + END_OF_CARTOUCHE, + END_OF_LONG_GLYPH, + END_OF_REVERSE_LONG_GLYPH, + SCALING_JOINER, + STACKING_JOINER, + START_OF_CARTOUCHE, + START_OF_LONG_GLYPH, + START_OF_REVERSE_LONG_GLYPH, + UCSUR_TO_LATIN, +} from "./ucsur.ts"; + +/** parses space. */ +export function spaces(): Parser { + return match(/\s*/, "space").map(([space]) => space); +} +/** parses a string of consistent length. */ +function slice(length: number, description: string): Parser { + return new Parser((src) => { + if (src.length < length) { + return new Output(new UnexpectedError(src, description)); + } else { + return new Output([{ + rest: src.slice(length), + value: src.slice(0, length), + }]); + } + }); +} +/** Parses a string that exactly matches the given string. */ +function matchString(match: string): Parser { + return slice(match.length, `"${match}"`).map((slice) => { + if (slice === match) { + return match; + } else { + throw new UnexpectedError(`"${slice}"`, `"${match}"`); + } + }); +} +/** Parses lowercase latin word. */ +function latinWord(): Parser { + return match(/([a-z][a-zA-Z]*)\s*/, "word").map(([_, word]) => { + if (/[A-Z]/.test(word)) { + throw new UnrecognizedError(`"${word}"`); + } else { + return word; + } + }); +} +/** Parses variation selector. */ +function variationSelector(): Parser { + return match(/[\uFE00-\uFE0F]/, "variation selector") + .map(([character]) => character); +} +/** + * Parses an UCSUR character, this doesn't parse space and so must be manually + * added if needed. + */ +function ucsur(): Parser { + return slice(2, "UCSUR character"); +} +/** + * Parses a specific UCSUR character, this doesn't parse space and so must be + * manually added if needed + */ +function specificUcsurCharacter( + character: string, + description: string, +): Parser { + return ucsur().filter((word) => { + if (word === character) { + return true; + } else { + throw new UnexpectedError(`"${word}"`, description); + } + }); +} +/** + * Parses UCSUR word, this doesn't parse space and so must be manually added if + * needed + */ +function ucsurWord(): Parser { + return ucsur().map((word) => { + const latin = UCSUR_TO_LATIN[word]; + if (latin == null) { + throw new UnexpectedError(word, "UCSUR glyph"); + } else { + return latin; + } + }); +} +/** Parses a single UCSUR word. */ +function singleUcsurWord(): Parser { + return ucsurWord().skip(optionalAll(variationSelector())).skip(spaces()); +} +/** Parses a joiner. */ +function joiner(): Parser { + return choiceOnlyOne( + match(/\u200D/, "zero width joiner").map((_) => "zero width joiner"), + specificUcsurCharacter(STACKING_JOINER, "stacking joiner"), + specificUcsurCharacter(SCALING_JOINER, "scaling joiner"), + ); +} +/** + * Parses combined glyphs. The spaces after aren't parsed and so must be + * manually added by the caller. + */ +function combinedGlyphs(): Parser> { + return sequence( + ucsurWord(), + allAtLeastOnce( + joiner().with(ucsurWord()), + ), + ) + .map(([first, rest]) => [first, ...rest]); +} +/** Parses a word, either UCSUR or latin. */ +function word(): Parser { + return choiceOnlyOne(latinWord(), singleUcsurWord()); +} +/** Parses proper words spanning multiple words. */ +function properWords(): Parser { + return allAtLeastOnce( + match(/([A-Z][a-zA-Z]*)\s*/, "proper word").map(([_, word]) => word), + ) + .map((array) => array.join(" ")); +} +/** Parses a specific word, either UCSUR or latin. */ +function specificWord(thatWord: string): Parser { + return word().filter((thisWord) => { + if (thatWord === thisWord) return true; + else throw new UnexpectedError(`"${thisWord}"`, `"${thatWord}"`); + }); +} +/** Parses multiple a. */ +function multipleA(): Parser { + return sequence(specificWord("a"), allAtLeastOnce(specificWord("a"))) + .map(([a, as]) => [a, ...as].length); +} +/** Parses lengthened words. */ +function longWord(): Parser { + return match(/[an]/, 'long "a" or "n"') + .then(([word, _]) => + count(allAtLeastOnce(matchString(word))) + .map((count) => + ({ + type: "long word", + word, + length: count + 1, + }) as Token & { type: "long word" } + ) + ) + .skip(spaces()); +} +/** Parses X ala X constructions. */ +function xAlaX(): Parser { + return word().then((word) => + sequence(specificWord("ala"), specificWord(word)).map(() => word) + ); +} +/** Parses a punctuation. */ +function punctuation(): Parser { + return match(/([.,:;?!󱦜󱦝])\s*/u, "punctuation") + .map(([_, punctuation]) => punctuation); +} +/** Parses cartouche element and returns the phonemes or letters it represents. */ +function cartoucheElement(): Parser { + return choiceOnlyOne( + singleUcsurWord() + .skip( + match(/([\uff1a󱦝])\s*/u, "full width colon").map(([_, dot]) => dot), + ), + sequence( + singleUcsurWord(), + count( + allAtLeastOnce( + match(/([・。/󱦜])\s*/u, "full width dot").map(([_, dot]) => dot), + ), + ), + ) + .map(([word, dots]) => { + let count = dots; + if (/^[aeiou]/.test(word)) { + count++; + } + const morae = word.match(/[aeiou]|[jklmnpstw][aeiou]|n/g)!; + if (morae.length < count) { + throw new UnrecognizedError("Excess dots"); + } + return morae.slice(0, count).join(""); + }), + singleUcsurWord().map((word) => word[0]), + match(/([a-zA-Z]+)\s*/, "Latin letter") + .map(([_, letter]) => letter.toLowerCase()), + ); +} +/** Parses a single cartouche. */ +function cartouche(): Parser { + return sequence( + specificUcsurCharacter(START_OF_CARTOUCHE, "start of cartouche") + .skip(spaces()), + allAtLeastOnce(cartoucheElement()), + specificUcsurCharacter(END_OF_CARTOUCHE, "end of cartouche").skip(spaces()), + ) + .map(([_, words, _1]) => { + const word = words.join(""); + return word[0].toUpperCase() + word.slice(1); + }); +} +/** Parses multiple cartouches. */ +function cartouches(): Parser { + return allAtLeastOnce(cartouche()).map((words) => words.join(" ")); +} +/** + * Parses long glyph container. + * + * spaces after the first glyph and the last glyph aren't parsed and so must be + * manually added by the caller if needed. + */ +function longContainer( + left: string, + right: string, + inside: Parser, +): Parser { + const description: { [character: string]: string } = { + [START_OF_LONG_GLYPH]: "start of long glyph", + [END_OF_LONG_GLYPH]: "end of long glyph", + [START_OF_REVERSE_LONG_GLYPH]: "start of reverse long glyph", + [END_OF_REVERSE_LONG_GLYPH]: "end of reverse long glyph", + }; + return sequence( + specificUcsurCharacter(left, description[left]), + inside, + specificUcsurCharacter(right, description[right]), + ) + .map(([_, inside, _1]) => inside); +} +/** Parses long glyph container containing just spaces. */ +function longSpaceContainer(): Parser { + return longContainer( + START_OF_LONG_GLYPH, + END_OF_LONG_GLYPH, + match(/\s+/, "space").map(([space]) => space.length), + ) + .skip(spaces()); +} +/** + * Parses long glyph head. + * + * This doesn't parses space on the right and so must be manually added by the + * caller if needed. + */ +function longGlyphHead(): Parser> { + return choiceOnlyOne( + combinedGlyphs(), + ucsurWord().map((word) => [word]), + ); +} +/** Parses long glyph that only contains spaces. */ +function spaceLongGlyph(): Parser { + return sequence(longGlyphHead(), longSpaceContainer()) + .map(([words, spaceLength]) => ({ + type: "space long glyph", + words, + spaceLength, + })); +} +function headedLongGlyphStart(): Parser< + Token & { type: "headed long glyph start" } +> { + return longGlyphHead().skip( + specificUcsurCharacter(START_OF_LONG_GLYPH, "start of long glyph"), + ) + .skip(spaces()) + .map((words) => ({ type: "headed long glyph start", words })); +} +function headlessLongGlyphEnd(): Parser< + Token & { type: "headless long glyph end" } +> { + return specificUcsurCharacter(END_OF_LONG_GLYPH, "end of long glyph") + .skip(spaces()) + .map((_) => ({ type: "headless long glyph end" })); +} +function headlessLongGlyphStart(): Parser< + Token & { type: "headless long glyph end" } +> { + return specificUcsurCharacter( + START_OF_REVERSE_LONG_GLYPH, + "start of reverse long glyph", + ) + .skip(spaces()) + .map((_) => ({ type: "headless long glyph end" })); +} +function headedLongGlyphEnd(): Parser< + Token & { type: "headed long glyph start" } +> { + return specificUcsurCharacter( + END_OF_REVERSE_LONG_GLYPH, + "end of reverse long glyph", + ) + .with(longGlyphHead()) + .skip(spaces()) + .map((words) => ({ type: "headed long glyph start", words })); +} +function insideLongGlyph(): Parser< + Token & { type: "headed long glyph start" } +> { + return specificUcsurCharacter( + END_OF_REVERSE_LONG_GLYPH, + "end of reverse long glyph", + ) + .with(longGlyphHead()) + .skip(specificUcsurCharacter(START_OF_LONG_GLYPH, "start of long glyph")) + .skip(spaces()) + .map((words) => ({ type: "headed long glyph start", words })); +} +/** Parses a token. */ +export const TOKEN = cached(choiceOnlyOne( + spaceLongGlyph(), + headedLongGlyphStart(), + combinedGlyphs() + .skip(spaces()) + .map((words) => ({ type: "combined glyphs", words }) as Token), + properWords().map((words) => + ({ type: "proper word", words, kind: "latin" }) as Token + ), + longWord(), + multipleA().map((count) => ({ type: "multiple a", count }) as Token), + xAlaX().map((word) => ({ type: "x ala x", word }) as Token), + word().map((word) => ({ type: "word", word }) as Token), + // starting with non-words: + punctuation().map((punctuation) => + ({ type: "punctuation", punctuation }) as Token + ), + headlessLongGlyphEnd(), + headedLongGlyphEnd(), + headlessLongGlyphStart(), + insideLongGlyph(), + cartouches().map((words) => + ({ type: "proper word", words, kind: "cartouche" }) as Token + ), +)); diff --git a/src/main.ts b/src/main.ts index eed60e7..2b0b579 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,53 +1,171 @@ -import { translate } from "./translator.ts"; +/** Module for main execution in the browser. */ + +import { translate } from "./composer.ts"; +import { shuffle } from "./misc.ts"; +import { settings } from "./settings.ts"; +import { errors } from "telo-misikeke/telo-misikeke.js"; // Set to false when releasing, set to true when developing const DEVELOPMENT = false; // Don't forget these two when releasing -const DATE_RELEASED = new Date("2024-2-1"); -const VERSION = "v0.2.2"; +const DATE_RELEASED = new Date("2024-8-15"); +const VERSION = "v0.3.0"; + +type Elements = { + input: HTMLTextAreaElement; + output: HTMLUListElement; + error: HTMLParagraphElement; + errorList: HTMLParagraphElement; + translateButton: HTMLButtonElement; + settingsButton: HTMLButtonElement; + dialogBox: HTMLDialogElement; + confirmButton: HTMLButtonElement; + cancelButton: HTMLButtonElement; + resetButton: HTMLButtonElement; + version: HTMLAnchorElement; +}; +/** A map of all HTML elements that are used here. */ +let elements: undefined | Elements; -// TODO: maybe use worker -document.addEventListener("DOMContentLoaded", () => { - const input = document.getElementById("input") as HTMLTextAreaElement; - const output = document.getElementById("output") as HTMLUListElement; - const error = document.getElementById("error") as HTMLParagraphElement; - const button = document.getElementById( - "translate-button", - ) as HTMLButtonElement; - const version = document.getElementById("version") as HTMLAnchorElement; +function loadElements(): void { + const elementNames = { + input: "input", + output: "output", + error: "error", + errorList: "error-list", + translateButton: "translate-button", + settingsButton: "settings-button", + dialogBox: "dialog-box", + confirmButton: "confirm-button", + cancelButton: "cancel-button", + resetButton: "reset-button", + version: "version", + // deno-lint-ignore no-explicit-any + } as any; + for (const name of Object.keys(elementNames)) { + elementNames[name] = document.getElementById(elementNames[name]); + } + elements = elementNames; +} +function setVersion(): void { if (DEVELOPMENT) { - version.innerText = `${VERSION} (On development)`; + elements!.version.innerText = `${VERSION} (On development)`; } else { const date = DATE_RELEASED.toLocaleDateString(undefined, { dateStyle: "short", }); - version.innerText = `${VERSION} - Released ${date}`; + elements!.version.innerText = `${VERSION} - Released ${date}`; + } +} +function clearOutput(): void { + elements!.output.innerHTML = ""; + elements!.errorList.innerHTML = ""; + elements!.error.innerText = ""; +} +function outputTranslations(output: Array): void { + for (const translation of output) { + const list = document.createElement("li"); + list.innerHTML = translation; + elements!.output.appendChild(list); + } +} +function outputErrors(errors: Array, asHtml: boolean): void { + let property: "innerText" | "innerHTML"; + if (asHtml) { + property = "innerHTML"; + } else { + property = "innerText"; } - const listener = () => { - while (output.children.length > 0) { - output.removeChild(output.children[0]); + if (errors.length === 0) { + elements!.error.innerText = + "An unknown error has occurred (Errors should be known, please report this)"; + } else if (errors.length === 1) { + elements!.error.innerText = "An error has been found:"; + const list = document.createElement("li"); + list[property] = errors[0]; + elements!.errorList.appendChild(list); + } else { + elements!.error.innerText = "Multiple errors has been found:"; + for (const errorMessage of errors) { + const list = document.createElement("li"); + list[property] = errorMessage; + elements!.errorList.appendChild(list); } - error.innerText = ""; - const translations = translate(input.value); - if (translations.isError()) { - error.innerText = translations.error?.message ?? "No error provided"; + } +} +function updateOutput(): void { + clearOutput(); + const source = elements!.input.value; + try { + const translations = translate(source); + if (!translations.isError()) { + const output = [...new Set(translations.output)]; + if (settings.get("randomize")) { + shuffle(output); + } + outputTranslations(output); } else { - const set = new Set(); - for (const translation of translations.output) { - if (!set.has(translation)) { - const list = document.createElement("li"); - list.innerText = translation; - output.appendChild(list); - set.add(translation); - } + let asHtml = true; + let error: Array = []; + if (settings.get("use-telo-misikeke")) { + error = errors(source); + } + if (error.length === 0) { + error = [ + ...new Set( + translations.errors.map((x) => x.message), + ), + ]; + asHtml = false; } + outputErrors(error, asHtml); } - }; - button.addEventListener("click", listener); - input.addEventListener("keydown", (event) => { - if (event.code === "Enter") { - listener(); - event.preventDefault(); + } catch (unreachableError) { + let error: string; + if (unreachableError instanceof Error) { + error = unreachableError.message; + } else { + error = `${unreachableError}`; + } + error += " (please report this)"; + outputErrors([error], false); + throw unreachableError; + } +} +if (typeof document !== "undefined") { + document.addEventListener("DOMContentLoaded", () => { + loadElements(); + settings.loadFromLocalStorage(); + setVersion(); + // Auto resize + function resizeTextarea() { + elements!.input.style.height = "auto"; + elements!.input.style.height = `${ + Math.max(50, elements!.input.scrollHeight + 20) + }px`; } + resizeTextarea(); + elements!.input.addEventListener("input", resizeTextarea); + elements!.settingsButton.addEventListener("click", () => { + elements!.dialogBox.showModal(); + }); + elements!.confirmButton.addEventListener("click", () => { + settings.loadFromElements(); + elements!.dialogBox.close(); + }); + elements!.cancelButton.addEventListener("click", () => { + settings.resetElementsToCurrent(); + elements!.dialogBox.close(); + }); + elements!.resetButton.addEventListener("click", () => { + settings.resetElementsToDefault(); + }); + elements!.translateButton.addEventListener("click", updateOutput); + elements!.input.addEventListener("keydown", (event) => { + if (event.code === "Enter") { + event.preventDefault(); + updateOutput(); + } + }); }); -}); +} diff --git a/src/misc.ts b/src/misc.ts new file mode 100644 index 0000000..9b42f89 --- /dev/null +++ b/src/misc.ts @@ -0,0 +1,32 @@ +export function nullableAsArray( + value?: T | null | undefined, +): Array> { + if (value == null) { + return []; + } else { + return [value]; + } +} +export function repeat(text: string, count: number): string { + return new Array(count).fill(text).join(""); +} +export function repeatWithSpace(text: string, count: number): string { + return new Array(count).fill(text).join(" "); +} +// https://stackoverflow.com/questions/2450954/how-to-randomize-shuffle-a-javascript-array +export function shuffle(array: Array) { + let currentIndex = array.length; + + // While there remain elements to shuffle... + while (currentIndex != 0) { + // Pick a remaining element... + const randomIndex = Math.floor(Math.random() * currentIndex); + currentIndex--; + + // And swap it with the current element. + [array[currentIndex], array[randomIndex]] = [ + array[randomIndex], + array[currentIndex], + ]; + } +} diff --git a/src/output.ts b/src/output.ts index f725ae0..6f31c6a 100644 --- a/src/output.ts +++ b/src/output.ts @@ -1,64 +1,94 @@ -import { OutputError } from "./error.ts"; +/** Module containing the Output data type. */ + +/** Represents Error used by `Output`. */ +export class OutputError extends Error {} /** Represents possibilities and error. */ export class Output { /** Represents possibilities, considered error when the array is empty. */ - output: Array; - /** - * An optional error, should be supplied if and only if the array is empty. - */ - error: null | OutputError; + readonly output: Array; + /** A list of all aggregated errors. */ + readonly errors: Array = []; constructor(output?: undefined | null | Array | OutputError) { if (Array.isArray(output)) { this.output = output; - if (output.length === 0) { - this.error = new OutputError("no error provided"); - } else this.error = null; } else if (output instanceof OutputError) { this.output = []; - this.error = output; + this.errors.push(output); } else { this.output = []; - this.error = new OutputError(); } } - private setError(error: OutputError) { - if (this.output.length === 0 && !this.error) this.error = error; + private static newErrors(errors: Array): Output { + const output = new Output(); + for (const error of errors) { + output.pushError(error); + } + return output; + } + private pushError(error: OutputError): void { + if (this.isError()) { + this.errors.push(error); + } } private push(value: T): void { this.output.push(value); - this.error = null; + this.errors.length = 0; } - private append({ output, error }: Output): void { - this.output = [...this.output, ...output]; - if (this.output.length > 0) this.error = null; - else this.error = error; + private append(output: Output): void { + for (const item of output.output) { + this.push(item); + } + if (this.isError() && output.isError()) { + for (const item of output.errors) { + this.pushError(item); + } + } } /** Returns true when the output array is empty */ isError(): boolean { return this.output.length === 0; } + /** Filters outputs. For convenience, the mapper function can throw + * OutputError; Other kinds of errors will be ignored. + */ filter(mapper: (value: T) => boolean): Output { - return this.map((value) => { - if (mapper(value)) { - return value; - } else { - throw new OutputError("no error provided"); + if (this.isError()) { + return Output.newErrors(this.errors); + } + const wholeOutput = new Output(); + for (const value of this.output) { + try { + if (mapper(value)) { + wholeOutput.push(value); + } + } catch (error) { + if (error instanceof OutputError) { + wholeOutput.pushError(error); + } else { + throw error; + } } - }); + } + return wholeOutput; } /** * Maps all values and returns new Output. For convenience, the mapper * function can throw OutputError; Other kinds of errors will be ignored. */ map(mapper: (value: T) => U): Output { - if (this.isError()) return new Output(this.error); + if (this.isError()) { + return Output.newErrors(this.errors); + } const wholeOutput = new Output(); for (const value of this.output) { try { wholeOutput.push(mapper(value)); } catch (error) { - if (error instanceof OutputError) this.setError(error); - else throw error; + if (error instanceof OutputError) { + wholeOutput.pushError(error); + } else { + throw error; + } } } return wholeOutput; @@ -68,11 +98,45 @@ export class Output { * values and flattens them into single array for Output. */ flatMap(mapper: (value: T) => Output): Output { - if (this.isError()) return new Output(this.error); + if (this.isError()) { + return Output.newErrors(this.errors); + } const wholeOutput = new Output(); for (const value of this.output) wholeOutput.append(mapper(value)); return wholeOutput; } + filterMap(mapper: (value: T) => U): Output> { + if (this.isError()) { + return Output.newErrors(this.errors); + } + const wholeOutput = new Output>(); + for (const value of this.output) { + try { + const newValue = mapper(value); + if (newValue != null) { + wholeOutput.push(newValue); + } + } catch (error) { + if (error instanceof OutputError) { + wholeOutput.pushError(error); + } else { + throw error; + } + } + } + return wholeOutput; + } + sort(comparer: (left: T, right: T) => number): Output { + if (this.isError()) { + return Output.newErrors(this.errors); + } else { + return new Output(this.output.slice().sort(comparer)); + } + } + sortBy(mapper: (value: T) => number): Output { + return this.sort((left, right) => mapper(left) - mapper(right)); + } + /** Combines all outputs. */ static concat(...outputs: Array>): Output { const wholeOutput = new Output(); for (const output of outputs) { @@ -80,4 +144,30 @@ export class Output { } return wholeOutput; } + /** + * Combines all permutations of all Outputs into an Output of a single tuple + * or array. If some of the Output is an error, all errors are aggregated. + */ + static combine>( + ...outputs: { [I in keyof T]: Output } & { length: T["length"] } + ): Output { + // We resorted to using `any` types here, make sure it works properly + return outputs.reduce( + // deno-lint-ignore no-explicit-any + (output: Output, newOutput) => { + if (output.isError() && newOutput.isError()) { + return Output.concat(output, newOutput); + } else if (output.isError()) { + return Output.newErrors(output.errors); + } else if (newOutput.isError()) { + return Output.newErrors(newOutput.errors); + } else { + return output + .flatMap((left) => newOutput.map((right) => [...left, right])); + } + }, + // deno-lint-ignore no-explicit-any + new Output([[]]), + ) as Output; + } } diff --git a/src/parser-lib.ts b/src/parser-lib.ts new file mode 100644 index 0000000..b21c5b5 --- /dev/null +++ b/src/parser-lib.ts @@ -0,0 +1,242 @@ +/** + * A generic module for parser and parser combinator. It is used by both lexer + * and AST parser. + */ + +import { UnexpectedError } from "./error.ts"; +import { Output, OutputError } from "./output.ts"; + +/** A single parsing result. */ +export type ValueRest = { rest: string; value: T }; +/** A special kind of Output that parsers returns. */ +export type ParserOutput = Output>; + +/** Wrapper of parser function with added methods for convenience. */ +export class Parser { + constructor(public readonly parser: (src: string) => ParserOutput) {} + /** + * Maps the parsing result. For convenience, the mapper function can throw + * an OutputError; Other kinds of error are ignored. + */ + map(mapper: (value: T) => U): Parser { + return new Parser((src) => + this + .parser(src) + .map(({ value, rest }) => ({ value: mapper(value), rest })) + ); + } + /** + * Filters outputs. Instead of returning false, OutputError must be thrown + * instead. + */ + filter(mapper: (value: T) => boolean): Parser { + return new Parser((src) => + this.parser(src).filter(({ value }) => mapper(value)) + ); + } + /** + * Parses `this` then passes the parsing result in the mapper. The resulting + * parser is then also parsed. + */ + then(mapper: (value: T) => Parser): Parser { + return new Parser((src) => + this.parser(src).flatMap(({ value, rest }) => mapper(value).parser(rest)) + ); + } + sort(comparer: (left: T, right: T) => number): Parser { + return new Parser((src) => + this.parser(src).sort((left, right) => comparer(left.value, right.value)) + ); + } + sortBy(mapper: (value: T) => number): Parser { + return this.sort((left, right) => mapper(left) - mapper(right)); + } + /** Takes another parser and discards the parsing result of `this`. */ + with(parser: Parser): Parser { + return sequence(this, parser).map(([_, output]) => output); + } + /** Takes another parser and discards its parsing result. */ + skip(parser: Parser): Parser { + return sequence(this, parser).map(([output, _]) => output); + } + parse(src: string): Output { + return this.parser(src).map(({ value }) => value); + } +} +/** Parser that always outputs an error. */ +export function error(error: OutputError): Parser { + return new Parser(() => new Output(error)); +} +/** Parser that always outputs an empty output. */ +export function empty(): Parser { + return new Parser(() => new Output()); +} +/** Parses nothing and leaves the source string intact. */ +export function nothing(): Parser { + return new Parser((src) => new Output([{ value: null, rest: src }])); +} +/** Parses without consuming the source string */ +export function lookAhead(parser: Parser): Parser { + return new Parser((src) => + parser.parser(src).map(({ value }) => ({ value, rest: src })) + ); +} +/** + * Lazily evaluates the parser function only when needed. Useful for recursive + * parsers. + */ +export function lazy(parser: () => Parser): Parser { + return new Parser((src) => parser().parser(src)); +} +/** + * Evaluates all parsers on the same source string and sums it all on a single + * Output. + */ +export function choice(...choices: Array>): Parser { + return new Parser((src) => + new Output(choices).flatMap((parser) => parser.parser(src)) + ); +} +/** + * Tries to evaluate each parsers one at a time and only only use the output of + * the parser that is successful. + */ +export function choiceOnlyOne( + ...choices: Array> +): Parser { + return choices.reduceRight((newParser, parser) => + new Parser((src) => { + const output = parser.parser(src); + if (output.isError()) { + return Output.concat(output, newParser.parser(src)); + } else { + return output; + } + }), empty()); +} +/** Combines `parser` and the `nothing` parser, and output `null | T`. */ +export function optional(parser: Parser): Parser { + return choice(parser, nothing()); +} +/** + * Like `optional` but when the parser is successful, it doesn't consider + * parsing nothing. + */ +export function optionalAll(parser: Parser): Parser { + return choiceOnlyOne(parser, nothing()); +} +/** Takes all parsers and applies them one after another. */ +export function sequence>( + ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } +): Parser { + // We resorted to using `any` types here, make sure it works properly + return sequence.reduceRight( + // deno-lint-ignore no-explicit-any + (newParser: Parser, parser) => + parser.then((value) => newParser.map((newValue) => [value, ...newValue])), + nothing().map(() => []), + // deno-lint-ignore no-explicit-any + ) as Parser; +} +/** + * Parses `parser` multiple times and returns an `Array`. The resulting + * output includes all outputs from parsing nothing to parsing as many as + * possible. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. + */ +export function many(parser: Parser): Parser> { + return choice( + sequence(parser, lazy(() => many(parser))) + .map(([first, rest]) => [first, ...rest]), + nothing().map(() => []), + ); +} +/** + * Like `many` but parses at least once. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. + */ +export function manyAtLeastOnce(parser: Parser): Parser> { + return sequence(parser, many(parser)) + .map(([first, rest]) => [first, ...rest]); +} +/** + * Parses `parser` multiple times and returns an `Array`. This function is + * exhaustive unlike `many`. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. + */ +export function all(parser: Parser): Parser> { + return choiceOnlyOne( + sequence(parser, lazy(() => all(parser))) + .map(([first, rest]) => [first, ...rest]), + nothing().map(() => []), + ); +} +/** + * Like `all` but parses at least once. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. + */ +export function allAtLeastOnce(parser: Parser): Parser> { + return sequence(parser, all(parser)) + .map(([first, rest]) => [first, ...rest]); +} +export function count(parser: Parser>): Parser { + return parser.map((array) => array.length); +} +/** + * Uses Regular Expression to create parser. The parser outputs + * RegExpMatchArray, which is what `string.match( ... )` returns. + */ +export function match( + regex: RegExp, + description: string, +): Parser { + const newRegex = new RegExp(`^${regex.source}`, regex.flags); + return new Parser((src) => { + const match = src.match(newRegex); + if (match != null) { + return new Output([{ value: match, rest: src.slice(match[0].length) }]); + } else if (src === "") { + return new Output(new UnexpectedError("end of text", description)); + } else { + const token = src.match(/[^\s]*/)![0]; + let tokenDescription: string; + if (token === "") { + tokenDescription = "space"; + } else { + tokenDescription = `"${token}"`; + } + return new Output(new UnexpectedError(tokenDescription, description)); + } + }); +} +/** Parses the end of line (or the end of sentence in context of Toki Pona) */ +export function eol(): Parser { + return new Parser((src) => { + if (src === "") return new Output([{ value: null, rest: "" }]); + else return new Output(new UnexpectedError(`"${src}"`, "end of text")); + }); +} +export function cached(parser: Parser): Parser { + const cache: { [word: string]: ParserOutput } = {}; + return new Parser((src) => { + if (Object.hasOwn(cache, src)) { + return cache[src]; + } else { + const output = parser.parser(src); + cache[src] = output; + return output; + } + }); +} diff --git a/src/parser.ts b/src/parser.ts deleted file mode 100644 index 14df87a..0000000 --- a/src/parser.ts +++ /dev/null @@ -1,656 +0,0 @@ -import { - Clause, - FullClause, - Modifier, - MultiplePhrases, - MultiplePredicates, - Phrase, - Preposition, - Quotation, - Sentence, - WordUnit, -} from "./ast.ts"; -import { UnreachableError, UnrecognizedError } from "./error.ts"; -import { Output } from "./output.ts"; -import { - CONTENT_WORD, - PREPOSITION, - PREVERB, - SPECIAL_SUBJECT, -} from "./vocabulary.ts"; -import { - CLAUSE_RULE, - filter, - FULL_CLAUSE_RULE, - MODIFIER_RULES, - MODIFIERS_RULES, - PHRASE_RULE, - PREPOSITION_RULE, - SENTENCES_RULE, - WORD_UNIT_RULES, -} from "./filter.ts"; - -/** A single parsing result. */ -type ValueRest = { value: T; rest: string }; -/** A special kind of Output that parsers returns. */ -type ParserOutput = Output>; - -/** Wrapper of parser function with added methods for convenience. */ -class Parser { - constructor(public readonly parser: (src: string) => ParserOutput) {} - /** - * Maps the parsing result. For convenience, the mapper function can throw - * an OutputError; Other kinds of error are ignored. - */ - map(mapper: (value: T) => U): Parser { - return new Parser((src) => - this.parser(src).map(({ value, rest }) => ({ - value: mapper(value), - rest, - })) - ); - } - /** - * Filters outputs. The mapper may throw OutputError as well in place of - * returning false. - */ - filter(mapper: (value: T) => boolean): Parser { - return new Parser((src) => - this.parser(src).filter(({ value }) => mapper(value)) - ); - } - /** - * Parses `this` then passes the parsing result in the mapper. The resulting - * parser is then also parsed. - */ - then(mapper: (value: T) => Parser): Parser { - return new Parser((src) => - this.parser(src).flatMap(({ value, rest }) => mapper(value).parser(rest)) - ); - } - /** Takes another parser and discards the parsing result of `this`. */ - with(parser: Parser): Parser { - return sequence(this, parser).map(([_, output]) => output); - } - /** Takes another parser and discards its parsing result. */ - skip(parser: Parser): Parser { - return sequence(this, parser).map(([output, _]) => output); - } -} -/** - * Uses Regular Expression to create parser. The parser outputs - * RegExpMatchArray, which is what `string.match( ... )` returns. - */ -function match(regex: RegExp): Parser { - const newRegex = new RegExp("^" + regex.source, regex.flags); - return new Parser((src) => { - const match = src.match(newRegex); - if (match) { - return new Output([{ value: match, rest: src.slice(match[0].length) }]); - } else if (src === "") { - return new Output(new UnrecognizedError("Unexpected end of sentence")); - } else { - const token = src.match(/(.*)(?:\s|$)/)?.[1]; - if (token) return new Output(new UnrecognizedError(`"${token}"`)); - else return new Output(new UnreachableError()); - } - }); -} -/** Parses nothing and leaves the source string intact. */ -function nothing(): Parser { - return new Parser((src) => new Output([{ value: null, rest: src }])); -} -/** Parses the end of line (or the end of sentence in context of Toki Pona) */ -function eol(): Parser { - return new Parser((src) => { - if (src === "") return new Output([{ value: null, rest: "" }]); - else return new Output(new UnrecognizedError(`"${src}"`)); - }); -} -/** Parses without consuming the source string */ -function lookAhead(parser: Parser): Parser { - return new Parser((src) => - parser.parser(src).map(({ value }) => ({ value, rest: src })) - ); -} -/** - * Lazily evaluates the parser function only when needed. Useful for recursive - * parsers. - */ -function lazy(parser: () => Parser): Parser { - return new Parser((src) => parser().parser(src)); -} -/** - * Evaluates all parsers on the same source string and sums it all on a single - * Output. - */ -function choice(...choices: Array>): Parser { - return new Parser((src) => - new Output(choices).flatMap((parser) => parser.parser(src)) - ); -} -/** - * Tries to evaluate each parsers one at a time and only returns the first - * Output without error. - */ -function choiceOnlyOne(...choices: Array>): Parser { - return new Parser((src) => - choices.reduce((output, parser) => { - if (output.isError()) return parser.parser(src); - else return output; - }, new Output>()) - ); -} -/** Combines `parser` and the `nothing` parser, and output `null | T`. */ -function optional(parser: Parser): Parser { - return choice(parser, nothing()); -} -/** Takes all parsers and applies them one after another. */ -function sequence>( - ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } -): Parser { - // We resorted to using `any` types here, make sure it works properly - return new Parser((src) => - sequence.reduce( - (output, parser) => - output.flatMap(({ value, rest }) => - parser.parser(rest).map(({ value: newValue, rest }) => ({ - value: [...value, newValue], - rest, - })) - ), - // deno-lint-ignore no-explicit-any - new Output>([{ value: [], rest: src }]), - ) - ); -} -/** - * Parses `parser` multiple times and returns an `Array`. The resulting - * output includes all outputs from parsing nothing to parsing as many as - * possible. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ -function many(parser: Parser): Parser> { - return choice( - sequence(parser, lazy(() => many(parser))).map(( - [first, rest], - ) => [first, ...rest]), - nothing().map(() => []), - ); -} -/** - * Like `many` but parses at least once. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ -function manyAtLeastOnce(parser: Parser): Parser> { - return sequence(parser, many(parser)).map(( - [first, rest], - ) => [first, ...rest]); -} -/** - * Parses `parser` multiple times and returns an `Array`. This function is - * exhaustive unlike `many`. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ -function all(parser: Parser): Parser> { - return choiceOnlyOne( - sequence(parser, lazy(() => all(parser))).map(( - [first, rest], - ) => [first, ...rest]), - nothing().map(() => []), - ); -} -/** - * Like `all` but parses at least once. - * - * ## ⚠️ Warning - * - * Will cause infinite recursion if the parser can parse nothing. - */ -function allAtLeastOnce(parser: Parser): Parser> { - return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); -} -/** Parses comma. */ -function comma(): Parser { - return match(/,\s*/).map(() => ","); -} -/** Parses an optional comma. */ -function optionalComma(): Parser { - return optional(comma()); -} -/** Parses lowercase word. */ -function word(): Parser { - return match(/([a-z]+)\s*/).map(([_, word]) => word); -} -/** - * Parses all at least one uppercase words and combines them all into single - * string. This function is exhaustive like `all`. - */ -function properWords(): Parser { - return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map( - (array) => array.join(" "), - ); -} -/** Parses word only from `set`. */ -function wordFrom(set: Set, description: string): Parser { - return word().filter((word) => { - if (set.has(word)) return true; - else throw new UnrecognizedError(`"${word}" as ${description}`); - }); -} -/** Parses a specific word. */ -function specificWord(thatWord: string): Parser { - return word().filter((thisWord) => { - if (thatWord === thisWord) return true; - else throw new UnrecognizedError(`"${thisWord}" instead of "${thatWord}"`); - }); -} -/** Parses word unit without numbers. */ -function wordUnit(word: Parser): Parser { - return choice( - word.then((word) => - manyAtLeastOnce(specificWord(word)).map((words) => ({ - type: "reduplication", - word, - count: words.length + 1, - } as WordUnit)) - ), - word.then((word) => specificWord("ala").with(specificWord(word))).map(( - word, - ) => ({ type: "x ala x", word } as WordUnit)), - word.map((word) => ({ type: "default", word } as WordUnit)), - ).filter(filter(WORD_UNIT_RULES)); -} -/** Parses number words in order. */ -function number(): Parser> { - return sequence( - many(choice(specificWord("ale"), specificWord("ali"))), - many(specificWord("mute")), - many(specificWord("luka")), - many(specificWord("tu")), - many(specificWord("wan")), - ).map((array) => { - const output = array.flat(); - if (output.length >= 2) return output; - else throw new UnreachableError(); - }); -} -/** Parses multiple modifiers */ -function modifiers(): Parser> { - return sequence( - many( - choice( - wordUnit(wordFrom(CONTENT_WORD, "modifier")).map((word) => ({ - type: "default", - word, - } as Modifier)).filter(filter(MODIFIER_RULES)), - properWords().map(( - words, - ) => ({ type: "proper words", words } as Modifier)).filter( - filter(MODIFIER_RULES), - ), - number().map(( - numbers, - ) => ({ - type: "default", - word: { type: "numbers", numbers }, - } as Modifier)).filter(filter(MODIFIER_RULES)), - quotation().map(( - quotation, - ) => ({ type: "quotation", quotation } as Modifier)).filter( - filter(MODIFIER_RULES), - ), - ), - ), - many( - sequence(wordUnit(specificWord("nanpa")), phrase()).map(( - [nanpa, phrase], - ) => ({ - type: "nanpa", - nanpa, - phrase, - } as Modifier)).filter(filter(MODIFIER_RULES)), - ), - many( - specificWord("pi").with(phrase()).map((phrase) => ({ - type: "pi", - phrase, - } as Modifier)).filter(filter(MODIFIER_RULES)), - ), - ).map(( - [modifiers, nanpaModifiers, piModifiers], - ) => [...modifiers, ...nanpaModifiers, ...piModifiers]).filter( - filter(MODIFIERS_RULES), - ); -} -/** Parses phrases including preverbial phrases. */ -function phrase(): Parser { - return choice( - sequence(number(), lazy(modifiers)).map(( - [numbers, modifiers], - ) => ({ - type: "default", - headWord: { type: "numbers", numbers }, - modifiers, - } as Phrase)), - sequence( - wordUnit(wordFrom(PREVERB, "preverb")), - lazy(modifiers), - lazy(phrase), - ).map(( - [preverb, modifiers, phrase], - ) => ({ - type: "preverb", - preverb, - modifiers, - phrase, - } as Phrase)), - lazy(preposition).map((preposition) => ({ - type: "preposition", - preposition, - } as Phrase)), - sequence( - wordUnit(wordFrom(CONTENT_WORD, "headword")), - lazy(modifiers), - ).map(([headWord, modifiers]) => ({ - type: "default", - headWord, - modifiers, - } as Phrase)), - quotation().map(( - quotation, - ) => ({ type: "quotation", quotation } as Phrase)), - ).filter(filter(PHRASE_RULE)); -} -/** - * Parses nested phrases with given nesting rule, only accepting the top level - * operation. - */ -function nestedPhrasesOnly( - nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, -): Parser { - if (nestingRule.length === 0) { - return phrase().map( - (phrase) => ({ type: "single", phrase } as MultiplePhrases), - ); - } else { - const [first, ...rest] = nestingRule; - let type: "and conjunction" | "anu"; - if (["en", "li", "o", "e"].indexOf(first) !== -1) { - type = "and conjunction"; - } else { - type = "anu"; - } - return sequence( - nestedPhrases(rest), - manyAtLeastOnce( - optionalComma().with(specificWord(first)).with( - nestedPhrases(rest), - ), - ), - ).map(([group, moreGroups]) => ({ - type, - phrases: [group, ...moreGroups], - })); - } -} -/** Parses nested phrases with given nesting rule. */ -function nestedPhrases( - nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, -): Parser { - if (nestingRule.length === 0) { - return phrase().map( - (phrase) => ({ type: "single", phrase } as MultiplePhrases), - ); - } else { - return choice( - nestedPhrasesOnly(nestingRule), - nestedPhrases(nestingRule.slice(1)), - ); - } -} -/** Parses phrases separated by _en_ or _anu_. */ -function subjectPhrases(): Parser { - return choice( - nestedPhrasesOnly(["en", "anu"]), - nestedPhrasesOnly(["anu", "en"]), - phrase().map((phrase) => ({ type: "single", phrase })), - ); -} -/** Parses prepositional phrase. */ -function preposition(): Parser { - return sequence( - wordUnit(wordFrom(PREPOSITION, "preposition")), - modifiers(), - nestedPhrases(["anu"]), - ).map(([preposition, modifiers, phrases]) => ({ - preposition, - modifiers, - phrases, - })).filter(filter(PREPOSITION_RULE)); -} -/** - * Parses associated predicates whose predicates only uses top level operator. - */ -function associatedPredicates( - nestingRule: Array<"li" | "o" | "anu">, -): Parser { - return sequence( - nestedPhrasesOnly(nestingRule), - optional( - optionalComma().with(specificWord("e")).with( - nestedPhrases(["e", "anu"]), - ), - ), - many(optionalComma().with(preposition())), - ).map(([predicates, objects, prepositions]) => { - if (!objects && prepositions.length === 0) { - throw new UnreachableError(); - } else { - return { - type: "associated", - predicates, - objects, - prepositions, - }; - } - }); -} -/** Parses multiple predicates without _li_ nor _o_ at the beginning. */ -function multiplePredicates( - nestingRule: Array<"li" | "o" | "anu">, -): Parser { - if (nestingRule.length === 0) { - return choice( - associatedPredicates([]), - phrase().map(( - predicate, - ) => ({ type: "single", predicate } as MultiplePredicates)), - ); - } else { - const [first, ...rest] = nestingRule; - let type: "and conjunction" | "anu"; - if (first === "li" || first === "o") { - type = "and conjunction"; - } else { - type = "anu"; - } - return choice( - associatedPredicates(nestingRule), - sequence( - choice( - associatedPredicates(nestingRule), - multiplePredicates(rest), - ), - manyAtLeastOnce( - optionalComma().with(specificWord(first)).with( - choice( - associatedPredicates(nestingRule), - multiplePredicates(rest), - ), - ), - ), - ).map(([group, moreGroups]) => ({ - type, - predicates: [group, ...moreGroups], - } as MultiplePredicates)), - multiplePredicates(rest), - ); - } -} -/** Parses a single clause. */ -function clause(): Parser { - return choice( - sequence( - wordFrom(SPECIAL_SUBJECT, "mi/sina subject"), - multiplePredicates(["li", "anu"]), - ).map(([subject, predicates]) => ({ - type: "li clause", - subjects: { - type: "single", - phrase: { - type: "default", - headWord: { type: "default", word: subject }, - alaQuestion: false, - modifiers: [], - }, - }, - predicates, - } as Clause)), - sequence( - preposition(), - many(optionalComma().with(preposition())), - ).map(([preposition, morePreposition]) => ({ - type: "prepositions", - prepositions: [preposition, ...morePreposition], - } as Clause)), - subjectPhrases().map((phrases) => { - if (phrases.type === "single" && phrases.phrase.type === "quotation") { - throw new UnreachableError(); - } else { - return { type: "phrases", phrases } as Clause; - } - }), - subjectPhrases().skip(specificWord("o")).map((phrases) => ({ - type: "o vocative", - phrases, - } as Clause)), - sequence( - subjectPhrases(), - optionalComma().with(specificWord("li")).with( - multiplePredicates(["li", "anu"]), - ), - ).map(([subjects, predicates]) => ({ - type: "li clause", - subjects, - predicates, - } as Clause)), - sequence( - specificWord("o").with(multiplePredicates(["o", "anu"])), - ).map(([predicates]) => ({ - type: "o clause", - subjects: null, - predicates, - } as Clause)), - sequence( - subjectPhrases(), - optionalComma().with(specificWord("o")).with( - multiplePredicates(["o", "anu"]), - ), - ).map(([subjects, predicates]) => ({ - type: "o clause", - subjects: subjects, - predicates, - } as Clause)), - quotation().map((quotation) => ({ - type: "quotation", - quotation, - } as Clause)), - ).filter(filter(CLAUSE_RULE)); -} -/** Parses a single clause including precaluse and postclause. */ -function fullClause(): Parser { - return sequence( - optional(wordUnit(specificWord("taso")).skip(optionalComma())), - clause(), - optional( - optionalComma().with(specificWord("anu")).with( - wordUnit(specificWord("seme")), - ), - ), - ).map(([taso, clause, anuSeme]) => ({ - taso, - anuSeme, - clause, - })).filter(filter(FULL_CLAUSE_RULE)); -} -/** parses _la_ with optional comma around. */ -function la(): Parser { - return choice( - comma().with(specificWord("la")), - specificWord("la").skip(comma()), - specificWord("la"), - ); -} -/** Parses a single full sentence with optional punctuations. */ -function sentence(): Parser { - return sequence( - fullClause(), - many(la().with(fullClause())), - choice( - eol().map(() => ""), - lookAhead(closeQuotationMark()).map(() => ""), - match(/([.,:;?!])\s*/).map(([_, punctuation]) => punctuation), - ), - ).map(([clause, moreClauses, punctuation]) => ({ - laClauses: [clause, ...moreClauses], - punctuation, - })); -} -/** Parses opening quotation mark */ -function openQuotationMark(): Parser { - return match(/(["“«「])\s*/).map(([_, mark]) => mark); -} -/** Parses closing quotation mark */ -function closeQuotationMark(): Parser { - return match(/(["”»」])\s*/).map(([_, mark]) => mark); -} -/** Parses multiple sentences inside quotation mark */ -function quotation(): Parser { - return sequence( - openQuotationMark(), - many(lazy(sentence)).filter(filter(SENTENCES_RULE)), - closeQuotationMark(), - ).map(([leftMark, sentences, rightMark]) => { - if (leftMark === '"' || leftMark === "“") { - if (rightMark !== '"' && rightMark !== "”") { - throw new UnrecognizedError("Mismatched quotation marks"); - } - } else if (leftMark === "«") { - if (rightMark !== "»") { - throw new UnrecognizedError("Mismatched quotation marks"); - } - } else if (leftMark === "「") { - if (rightMark !== "」") { - throw new UnrecognizedError("Mismatched quotation marks"); - } - } else throw new UnreachableError(); - return { sentences, leftMark, rightMark }; - }); -} -/** A multiple Toki Pona sentence parser. */ -export function parser(src: string): Output> { - return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()).filter( - filter(SENTENCES_RULE), - ).parser(src) - .map(({ value }) => value); -} diff --git a/src/settings.ts b/src/settings.ts new file mode 100644 index 0000000..31e0842 --- /dev/null +++ b/src/settings.ts @@ -0,0 +1,176 @@ +/** Module for translation settings stored as a global state */ + +/** */ +type RedundancySettings = "both" | "condensed" | "default only"; +type Settings = { + "use-telo-misikeke": boolean; + "randomize": boolean; + "number-settings": RedundancySettings; + "tense-settings": RedundancySettings; +}; +const LOCAL_STORAGE_AVAILABLE = (() => { + if (typeof localStorage === "undefined") { + return false; + } + // https://developer.mozilla.org/en-US/docs/Web/API/Web_Storage_API/Using_the_Web_Storage_API + try { + const x = "__storage_test__"; + localStorage.setItem(x, x); + localStorage.removeItem(x); + return true; + } catch (e) { + return ( + e instanceof DOMException && + // everything except Firefox + (e.code === 22 || + // Firefox + e.code === 1014 || + // test name field too, because code might not be present + // everything except Firefox + e.name === "QuotaExceededError" || + // Firefox + e.name === "NS_ERROR_DOM_QUOTA_REACHED") && + // acknowledge QuotaExceededError only if there's something already stored + localStorage && + localStorage.length > 0 + ); + } +})(); +type Option = { + default: T; + updater: Updater; +}; +type SettingsItem = Option & { + value: T; +}; +type Updater = { + parse: (value: string) => T | null; + stringify: (value: T) => string; + load: (input: HTMLInputElement | HTMLSelectElement) => T; + set: (input: HTMLInputElement | HTMLSelectElement, value: T) => void; +}; +class Setter { + private settings: { [S in keyof T]: SettingsItem }; + constructor(option: { [S in keyof T]: Option }) { + // deno-lint-ignore no-explicit-any + const settings: any = {}; + for (const name of Object.keys(option)) { + const item = option[name]; + settings[name] = { + value: item.default, + default: item.default, + updater: item.updater, + }; + } + this.settings = settings; + } + get(name: S): T[S] { + return this.settings[name].value as T[S]; + } + /** This function is for browser only. */ + loadFromLocalStorage(): void { + if (!LOCAL_STORAGE_AVAILABLE) { + return; + } + for (const name of Object.keys(this.settings)) { + const settings = this.settings[name]; + const src = localStorage.getItem(name); + if (src != null) { + settings.value = settings.updater.parse(src) ?? settings.default; + } else { + settings.value = settings.default; + } + settings.updater.set( + document.getElementById(name) as HTMLInputElement, + settings.value, + ); + } + } + /** This function is for browser only. */ + loadFromElements(): void { + for (const name of Object.keys(this.settings)) { + const settings = this.settings[name]; + settings.value = settings.updater.load( + document.getElementById(name) as HTMLInputElement | HTMLSelectElement, + ); + if (LOCAL_STORAGE_AVAILABLE) { + localStorage.setItem(name, settings.updater.stringify(settings.value)); + } + } + } + /** This function is for browser only. */ + resetElementsToCurrent(): void { + for (const name of Object.keys(this.settings)) { + const settings = this.settings[name]; + settings.updater.set( + document.getElementById(name) as HTMLInputElement | HTMLSelectElement, + settings.value, + ); + } + } + /** This function is for browser only. */ + resetElementsToDefault(): void { + for (const name of Object.keys(this.settings)) { + const settings = this.settings[name]; + settings.updater.set( + document.getElementById(name) as HTMLInputElement | HTMLSelectElement, + settings.default, + ); + } + } +} +const boolUpdater: Updater = { + parse: (value) => { + switch (value) { + case "T": + return true; + case "F": + return false; + default: + return null; + } + }, + stringify: (value) => { + if (value) { + return "T"; + } else { + return "F"; + } + }, + load: (input) => (input as HTMLInputElement).checked, + set: (input, value) => { + (input as HTMLInputElement).checked = value; + }, +}; +const redundancyUpdater: Updater = { + parse: (value) => { + if (["both", "condensed", "default only"].includes(value)) { + return value as RedundancySettings; + } else { + return null; + } + }, + stringify: (value) => value, + load: (input) => input.value as RedundancySettings, + set: (input, value) => { + input.value = value; + }, +}; +export const settings = new Setter({ + "use-telo-misikeke": { + default: true, + updater: boolUpdater, + }, + "randomize": { + default: false, + updater: boolUpdater, + }, + "number-settings": { + default: "both", + updater: redundancyUpdater, + }, + "tense-settings": { + default: "both", + updater: redundancyUpdater, + }, +}); diff --git a/src/token.ts b/src/token.ts new file mode 100644 index 0000000..08f8a06 --- /dev/null +++ b/src/token.ts @@ -0,0 +1,71 @@ +/** Module describing token. */ + +/** Represents token. */ +export type Token = + | { type: "word"; word: string } + | { + type: "combined glyphs"; + words: Array; + } + | { + type: "space long glyph"; + words: Array; + spaceLength: number; + } + | { + type: "headed long glyph start"; + words: Array; + } + | { + type: "headless long glyph end"; + } + | { + type: "headless long glyph start"; + } + | { + type: "headed long glyph end"; + words: Array; + } + | { + type: "inside long glyph"; + words: Array; + } + | { type: "multiple a"; count: number } + | { type: "long word"; word: string; length: number } + | { type: "x ala x"; word: string } + | { type: "proper word"; words: string; kind: "cartouche" | "latin" } + | { type: "punctuation"; punctuation: string }; +/** Describes a token. Useful for error messages. */ +export function describe(token: Token): string { + switch (token.type) { + case "word": + return `"${token.word}"`; + case "combined glyphs": + return `combined glyphs "${token.words.join(" ")}"`; + case "space long glyph": + case "headed long glyph start": + case "headless long glyph start": + return "long glyph"; + case "headless long glyph end": + case "headed long glyph end": + case "inside long glyph": + return "end of long glyph"; + case "multiple a": + return `"${new Array(token.count).fill("a").join(" ")}"`; + case "long word": + return `"${new Array(token.length).fill(token.word).join("")}"`; + case "x ala x": + return `"${token.word} ala ${token.word}"`; + case "proper word": + switch (token.kind) { + case "cartouche": + return "cartouche"; + case "latin": + return "proper word"; + } + // this is unreachable + // fallthrough + case "punctuation": + return "punctuation mark"; + } +} diff --git a/src/translator.ts b/src/translator.ts index 98e042d..3e2afb0 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,346 +1,1299 @@ -import { Clause } from "./ast.ts"; -import { - FullClause, - Modifier, - MultiplePhrases, - Phrase, - Sentence, - WordUnit, -} from "./ast.ts"; -import { Output } from "./output.ts"; -import { parser } from "./parser.ts"; +import { parse } from "./ast-parser.ts"; +import * as TokiPona from "./ast.ts"; +import * as English from "./english-ast.ts"; import { TodoError } from "./error.ts"; -import { DEFINITION } from "./definition.ts"; -import { OutputError } from "./error.ts"; -import { UnreachableError } from "./error.ts"; - -/** A special kind of Output that translators returns. */ -export type TranslationOutput = Output; +import { nullableAsArray, repeat, repeatWithSpace } from "./misc.ts"; +import { Output } from "./output.ts"; +import { settings } from "./settings.ts"; +import { DICTIONARY } from "dictionary/dictionary.ts"; +import * as Dictionary from "dictionary/type.ts"; -const WORD_TO_NUMBER: { [word: string]: number } = { - ale: 100, - ali: 100, - mute: 20, - luka: 5, - tu: 2, - wan: 1, -}; -// TODO: -like and -related suffixes for nouns as adjectives -// TODO: "and" in "of" and "in X way" +const CONJUNCTION = { "and conjunction": "and", "anu": "or" } as const; -/** - * Helper function for turning array or tuple of Output into Output of array or - * tuple. Make use of `as const` to infer array as tuple. - */ -// TODO: maybe there's a better name -function rotate>( - array: { [I in keyof T]: Output } & { length: T["length"] }, -): Output { - // We resorted to using `any` types here, make sure it works properly - return array.reduce( - // deno-lint-ignore no-explicit-any - (result: Output, output) => - result.flatMap((left) => output.map((right) => [...left, right])), - // deno-lint-ignore no-explicit-any - new Output([[]]), - ) as Output; +function condense(first: string, second: string): string { + if (first === second) { + return first; + } else if ( + second.length > first.length && second.slice(0, first.length) === first + ) { + return `${first}(${second.slice(first.length)})`; + } else { + return `${first}/${second}`; + } +} +function condenseVerb(present: string, past: string): string { + const [first, ...rest] = present.split(" "); + const second = past.split(" ")[0]; + return [condense(first, second), ...rest].join(" "); +} +function unemphasized(word: string): English.Word { + return { word, emphasis: false }; +} +function findNumber( + determiner: Array, +): null | Dictionary.Quantity { + const quantity = determiner.map((determiner) => determiner.number); + if (quantity.every((quantity) => quantity === "both")) { + return "both"; + } else if ( + quantity.every((quantity) => quantity !== "plural") && + quantity.some((quantity) => quantity === "singular") + ) { + return "singular"; + } else if ( + quantity.every((quantity) => quantity !== "singular") && + quantity.some((quantity) => quantity === "plural") + ) { + return "plural"; + } else { + return null; + } +} +function nounForms( + singular: undefined | null | string, + plural: undefined | null | string, + determinerNumber: Dictionary.Quantity, +): Output<{ noun: string; number: English.Quantity }> { + switch (determinerNumber) { + case "both": + switch (settings.get("number-settings")) { + case "both": + return new Output([ + ...nullableAsArray(singular) + .map((noun) => ({ noun, number: "singular" as const })), + ...nullableAsArray(plural) + .map((noun) => ({ noun, number: "plural" as const })), + ]); + case "condensed": + if (singular != null && plural != null) { + return new Output([{ + noun: condense(singular, plural), + number: "condensed", + }]); + } + // fallthrough + case "default only": + if (singular != null) { + return new Output([{ noun: singular, number: "singular" }]); + } else { + return new Output([{ noun: plural!, number: "plural" }]); + } + } + // unreachable + // fallthrough + case "singular": + return new Output(nullableAsArray(singular)) + .map((noun) => ({ noun, number: "singular" as const })); + case "plural": + return new Output(nullableAsArray(plural)) + .map((noun) => ({ noun, number: "plural" as const })); + } +} +function simpleNounForms( + singular: undefined | null | string, + plural: undefined | null | string, +): Output { + return nounForms(singular, plural, "both").map((noun) => noun.noun); } -function definition( - kind: "noun" | "adjective" | "adverb", - word: string, -): TranslationOutput { - return Output.concat( - new Output(new OutputError(`No ${kind} translation found for ${word}.`)), - new Output(DEFINITION[word][kind]), +function noun( + definition: Dictionary.Noun, + emphasis: boolean, + count: number, +): Output { + const engDeterminer = Output.combine( + ...definition.determiner + .map((definition) => determiner(definition, false, 1)), ); + const engAdjective = Output.combine( + ...definition.adjective + .map((definition) => adjective(definition, null, 1)), + ); + return Output.combine(engDeterminer, engAdjective) + .flatMap(([determiner, adjective]) => { + const number = findNumber(determiner); + if (number == null) { + return new Output(); + } + return nounForms(definition.singular, definition.plural, number) + .map((noun) => ({ + type: "simple", + determiner, + adjective, + noun: { word: repeatWithSpace(noun.noun, count), emphasis }, + number: noun.number, + postCompound: null, + postAdjective: definition.postAdjective, + preposition: [], + emphasis: false, + })); + }); } -function number(words: Array): number { - return words.reduce((number, word) => number + WORD_TO_NUMBER[word], 0); +function determiner( + definition: Dictionary.Determiner, + emphasis: boolean, + count: number, +): Output { + return simpleNounForms(definition.determiner, definition.plural) + .map((determiner) => ({ + kind: definition.kind, + determiner: { + word: repeatWithSpace(determiner, count), + emphasis, + }, + number: definition.number, + })); } -function wordUnitAs( - kind: "noun" | "adjective" | "adverb", - word: WordUnit, -): TranslationOutput { - if (word.type === "default") { - return definition(kind, word.word); - } else if (word.type === "numbers") { - return new Output([number(word.numbers).toString()]); - } else if (word.type === "reduplication") { - return definition(kind, word.word).map((noun) => - new Array(word.count).fill(noun).join(" ") - ); +function adjective( + definition: Dictionary.Adjective, + emphasis: null | TokiPona.Emphasis, + count: number, +): Output { + let so: null | string; + if (emphasis == null) { + so = null; } else { - return new Output(new UnreachableError()); + switch (emphasis.type) { + case "word": + so = "so"; + break; + case "long word": + so = `s${repeat("o", emphasis.length)}`; + break; + } } + return new Output([ + ...nullableAsArray(so!).map((so) => ({ emphasis: false, so })), + { emphasis: emphasis != null, so: null }, + ]) + .map(({ emphasis, so }) => ({ + type: "simple", + kind: definition.kind, + adverb: [...definition.adverb, ...nullableAsArray(so)].map(unemphasized), + adjective: { + word: repeatWithSpace(definition.adjective, count), + emphasis, + }, + emphasis: false, + })); +} +function compoundAdjective( + definition: Dictionary.Definition & { type: "compound adjective" }, + emphasis: null | TokiPona.Emphasis, +): Output { + return Output.combine( + ...definition.adjective + .map((definition) => adjective(definition, emphasis, 1)), + ) + .map((adjective) => ({ + type: "compound", + conjunction: "and", + adjective, + emphasis: false, + })); } -function modifierAs( - kind: "noun" | "adjective" | "adverb", - modifier: Modifier, -): TranslationOutput { - if (modifier.type === "default") { - return wordUnitAs(kind, modifier.word); - } else if (modifier.type === "nanpa" || modifier.type === "proper words") { - return new Output(); - } else if (modifier.type === "pi") { - if (kind === "adverb") { +type ModifierTranslation = + | { type: "noun"; noun: English.NounPhrase } + | { type: "noun preposition"; noun: English.NounPhrase; preposition: string } + | { type: "adjective"; adjective: English.AdjectivePhrase } + | { type: "determiner"; determiner: English.Determiner } + | { type: "adverb"; adverb: English.Word } + | { type: "name"; name: string } + | { type: "in position phrase"; noun: English.NounPhrase }; +function defaultModifier(word: TokiPona.WordUnit): Output { + const emphasis = word.emphasis != null; + switch (word.type) { + case "number": { + let number: English.Quantity; + if (word.number === 1) { + number = "singular"; + } else { + number = "plural"; + } + return new Output([{ + type: "determiner", + determiner: { + determiner: { word: `${word.number}`, emphasis }, + kind: "numeral", + number, + }, + } as ModifierTranslation]); + } + case "x ala x": return new Output(); + case "default": + case "reduplication": { + let count: number; + switch (word.type) { + case "default": + count = 1; + break; + case "reduplication": + count = word.count; + break; + } + return new Output(DICTIONARY[word.word]).flatMap((definition) => { + switch (definition.type) { + case "noun": + return noun(definition, emphasis, count) + .map((noun) => + ({ + type: "noun", + noun, + }) as ModifierTranslation + ); + case "noun preposition": + return noun(definition.noun, emphasis, count) + .map((noun) => + ({ + type: "noun preposition", + noun, + preposition: definition.preposition, + }) as ModifierTranslation + ); + case "personal pronoun": + return simpleNounForms( + definition.singular?.object, + definition.plural?.object, + ) + .map((pronoun) => + ({ + type: "noun", + noun: { + type: "simple", + determiner: [], + adjective: [], + noun: { + word: repeatWithSpace(pronoun, count), + emphasis, + }, + number: "both", + postCompound: null, + postAdjective: null, + preposition: [], + emphasis: false, + }, + }) as ModifierTranslation + ); + case "determiner": + return determiner(definition, word.emphasis != null, count) + .map((determiner) => + ({ + type: "determiner", + determiner, + }) as ModifierTranslation + ); + case "adjective": + return adjective(definition, word.emphasis, count) + .map((adjective) => + ({ + type: "adjective", + adjective, + }) as ModifierTranslation + ); + case "compound adjective": + if (word.type === "default") { + return compoundAdjective(definition, word.emphasis) + .map((adjective) => + ({ + type: "adjective", + adjective, + }) as ModifierTranslation + ); + } else { + return new Output(); + } + case "adverb": + return new Output([{ + type: "adverb", + adverb: { + word: repeatWithSpace(definition.adverb, count), + emphasis, + }, + } as ModifierTranslation]); + default: + return new Output(); + } + }); } - return phraseAs(kind, modifier.phrase, { named: false, suffix: false }); - } else { - return new Output( - new TodoError(`translating ${modifier.type} as adjective`), - ); } } -function modifierAsSuffix( - kind: "noun" | "adjective", - suffix: Modifier, -): TranslationOutput { - let construction: string; - if (kind === "noun") { - construction = "of X"; - } else { - construction = "in X way"; +function modifier(modifier: TokiPona.Modifier): Output { + switch (modifier.type) { + case "default": + return defaultModifier(modifier.word); + case "proper words": + return new Output([{ type: "name", name: modifier.words }]); + case "pi": + return phrase(modifier.phrase, "object") + .filter((modifier) => + modifier.type !== "noun" || modifier.noun.type !== "simple" || + modifier.noun.preposition.length === 0 + ) + .filter((modifier) => + modifier.type != "adjective" || modifier.inWayPhrase == null + ); + case "nanpa": + return phrase(modifier.phrase, "object").filterMap((phrase) => { + if ( + phrase.type === "noun" && + (phrase.noun as English.NounPhrase & { type: "simple" }) + .preposition.length === 0 + ) { + return { + type: "in position phrase", + noun: { + type: "simple", + determiner: [], + adjective: [], + noun: { + word: "position", + emphasis: modifier.nanpa.emphasis != null, + }, + number: "singular", + postCompound: phrase.noun, + postAdjective: null, + preposition: [], + emphasis: false, + }, + } as ModifierTranslation; + } else { + return null; + } + }); + case "quotation": + return new Output(new TodoError(`translation of ${modifier.type}`)); } - if (suffix.type === "default") { - return wordUnitAs(kind, suffix.word).map((translation) => - construction.replace("X", translation) +} +type MultipleModifierTranslation = + | { + type: "adjectival"; + nounPreposition: null | { noun: English.NounPhrase; preposition: string }; + determiner: Array; + adjective: Array; + name: null | string; + ofPhrase: null | English.NounPhrase; + inPositionPhrase: null | English.NounPhrase; + } + | { + type: "adverbial"; + adverb: Array; + inWayPhrase: null | English.NounPhrase; + }; +function multipleModifiers( + modifiers: Array, +): Output { + return Output + .combine(...modifiers.map(modifier)) + .flatMap((modifiers) => { + const noun = modifiers + .filter((modifier) => modifier.type === "noun") + .map((modifier) => modifier.noun); + const nounPreposition = modifiers + .filter((modifier) => modifier.type === "noun preposition"); + const determiner = modifiers + .filter((modifier) => modifier.type === "determiner") + .map((modifier) => modifier.determiner); + const adjective = modifiers + .filter((modifier) => modifier.type === "adjective") + .map((modifier) => modifier.adjective); + const adverb = modifiers + .filter((modifier) => modifier.type === "adverb") + .map((modifier) => modifier.adverb); + const name = modifiers + .filter((modifier) => modifier.type === "name") + .map((modifier) => modifier.name); + const inPositionPhrase = modifiers + .filter((modifier) => modifier.type === "in position phrase") + .map((modifier) => modifier.noun); + let adjectival: Output; + if ( + noun.length <= 1 && + nounPreposition.length <= 1 && + adverb.length === 0 && + name.length <= 1 && + inPositionPhrase.length <= 1 && + (noun.length === 0 || inPositionPhrase.length === 0) + ) { + adjectival = new Output([{ + type: "adjectival", + nounPreposition: nounPreposition[0] ?? null, + determiner, + adjective, + name: name[0] ?? null, + ofPhrase: noun[0] ?? null, + inPositionPhrase: inPositionPhrase[0] ?? null, + } as MultipleModifierTranslation]); + } else { + adjectival = new Output(); + } + let adverbial: Output; + if ( + noun.length === 0 && + nounPreposition.length === 0 && + determiner.length === 0 && + adjective.length <= 1 && + name.length === 0 && + inPositionPhrase.length === 0 + ) { + let inWayPhrase: null | English.NounPhrase; + if (adjective.length > 0) { + inWayPhrase = { + type: "simple", + determiner: [], + adjective, + noun: { word: "way", emphasis: false }, + number: "singular", + postCompound: null, + postAdjective: null, + preposition: [], + emphasis: false, + }; + } else { + inWayPhrase = null; + } + adverbial = new Output([{ + type: "adverbial", + adverb, + inWayPhrase, + } as MultipleModifierTranslation]); + } else { + adverbial = new Output(); + } + return Output.concat(adjectival, adverbial); + }); +} +function fixDeterminer( + determiner: Array, +): null | Array { + const negative = determiner + .filter((determiner) => determiner.kind === "negative"); + const first = determiner + .filter((determiner) => + ["article", "demonstrative", "possessive"].includes(determiner.kind) ); - } else if (suffix.type === "nanpa") { - return phraseAs(kind, suffix.phrase, { - named: kind === "noun", - suffix: false, - }).map( - (translation) => `in position ${translation}`, + const distributive = determiner + .filter((determiner) => determiner.kind === "distributive"); + const interrogative = determiner + .filter((determiner) => determiner.kind === "interrogative"); + const numerical = determiner + .filter((determiner) => + determiner.kind === "numeral" || determiner.kind === "quantifier" ); - } else if (suffix.type === "pi") { - return phraseAs(kind, suffix.phrase, { - named: kind === "noun", - suffix: false, - }).map(( - translation, - ) => construction.replace("X", translation)); - } else if (suffix.type === "proper words") { - return new Output([`named ${suffix.words}`]); + if ( + negative.length > 1 || first.length > 1 || distributive.length > 1 || + interrogative.length > 1 || numerical.length > 1 || + negative.length > 0 && interrogative.length > 0 + ) { + return null; } else { - return new Output( - new TodoError(`translation of ${suffix.type} as noun`), - ); + return [ + ...negative, + ...first, + ...distributive, + ...interrogative, + ...numerical, + ]; } } -function defaultPhraseAs( - kind: "noun" | "adjective", - phrase: Phrase & { type: "default" }, - options?: { - named?: boolean; - suffix?: boolean; - }, -): TranslationOutput { - const named = options?.named ?? true; - const suffix = options?.suffix ?? true; - const name = ( - phrase.modifiers.filter( - (modifier) => modifier.type === "proper words", - )[0] as undefined | (Modifier & { type: "proper words" }) - )?.words; - if (name && !named) { - return new Output(); +function rankAdjective(kind: Dictionary.AdjectiveType): number { + return [ + "opinion", + "size", + "physical quality", + "age", + "color", + "origin", + "material", + "qualifier", + ] + .indexOf(kind); +} +function fixAdjective( + adjective: Array, +): Array { + return (adjective + .flatMap((adjective) => { + switch (adjective.type) { + case "simple": + return [adjective]; + case "compound": + return adjective.adjective; + } + }) as Array) + .sort((a, b) => rankAdjective(a.kind) - rankAdjective(b.kind)); +} +type WordUnitTranslation = + | { + type: "noun"; + determiner: Array; + adjective: Array; + singular: null | string; + plural: null | string; + postAdjective: null | { adjective: string; name: string }; } - let modifierKind: "adjective" | "adverb"; - if (kind === "noun") { - modifierKind = "adjective"; - } else if (kind === "adjective") { - modifierKind = "adverb"; + | { + type: "adjective"; + adjective: English.AdjectivePhrase; + }; +function wordUnit( + wordUnit: TokiPona.WordUnit, + place: "subject" | "object", +): Output { + switch (wordUnit.type) { + case "number": + return new Output([{ + type: "noun", + determiner: [], + adjective: [], + singular: `${wordUnit.number}`, + plural: null, + postAdjective: null, + } as WordUnitTranslation]); + case "x ala x": + return new Output(); + case "default": + case "reduplication": { + let count: number; + switch (wordUnit.type) { + case "default": + count = 1; + break; + case "reduplication": + count = wordUnit.count; + break; + } + return new Output(DICTIONARY[wordUnit.word]) + .flatMap((definition) => { + switch (definition.type) { + case "noun": { + const engDeterminer = Output + .combine(...definition.determiner + .map((definition) => determiner(definition, false, 1))); + const engAdjective = Output + .combine(...definition.adjective + .map((definition) => adjective(definition, null, 1))); + return Output.combine(engDeterminer, engAdjective) + .map(([determiner, adjective]) => + ({ + type: "noun", + determiner, + adjective, + singular: definition.singular, + plural: definition.plural, + postAdjective: definition.postAdjective, + }) as WordUnitTranslation + ); + } + case "personal pronoun": { + let singular: null | string; + let plural: null | string; + switch (place) { + case "subject": + singular = definition.singular?.subject ?? null; + plural = definition.plural?.subject ?? null; + break; + case "object": + singular = definition.singular?.object ?? null; + plural = definition.plural?.object ?? null; + break; + } + return new Output([{ + type: "noun", + determiner: [], + adjective: [], + singular, + plural, + postAdjective: null, + } as WordUnitTranslation]); + } + case "adjective": + return adjective(definition, wordUnit.emphasis, count) + .map((adjective) => + ({ type: "adjective", adjective }) as WordUnitTranslation + ); + case "compound adjective": + if (wordUnit.type === "default") { + return compoundAdjective(definition, wordUnit.emphasis) + .map((adjective) => + ({ type: "adjective", adjective }) as WordUnitTranslation + ); + } else { + return new Output(); + } + default: + return new Output(); + } + }); + } } - const headWord = wordUnitAs(kind, phrase.headWord); - const modifierNoName = phrase.modifiers.filter(( - modifier, - ) => modifier.type !== "proper words"); - const modifierTranslation: Array = modifierNoName.map( - (modifier) => modifierAs(modifierKind, modifier), - ); - const translations = rotate([headWord, rotate(modifierTranslation)] as const) - .map( - ([headWord, modifiers]) => - [...modifiers.slice().reverse(), headWord].join(" "), - ).map( - (translation) => { - if (name) { - return `${translation} named ${name}`; +} +type PhraseTranslation = + | { type: "noun"; noun: English.NounPhrase } + | { + type: "adjective"; + adjective: English.AdjectivePhrase; + inWayPhrase: null | English.NounPhrase; + }; +function defaultPhrase( + phrase: TokiPona.Phrase & { type: "default" }, + place: "subject" | "object", +): Output { + return Output.combine( + wordUnit(phrase.headWord, place), + multipleModifiers(phrase.modifiers), + ) + .flatMap(([headWord, modifier]) => { + if (headWord.type === "noun" && modifier.type === "adjectival") { + let count: number; + switch (phrase.headWord.type) { + case "number": + case "default": + count = 1; + break; + case "x ala x": + return new Output(); + case "reduplication": + count = phrase.headWord.count; + break; + } + const determiner = fixDeterminer([ + ...headWord.determiner, + ...modifier.determiner, + ]); + if (determiner == null) { + return new Output(); + } + const number = findNumber(determiner); + if (number == null) { + return new Output(); + } + const adjective = fixAdjective([ + ...modifier.adjective.reverse(), + ...headWord.adjective, + ]); + let postAdjective: null | { + adjective: string; + name: string; + }; + if (headWord.postAdjective != null && modifier.name != null) { + return new Output(); + } else if (headWord.postAdjective != null) { + postAdjective = headWord.postAdjective; + } else if (modifier.name != null) { + postAdjective = { adjective: "named", name: modifier.name }; } else { - return translation; + postAdjective = null; } - }, - ); - if (suffix) { - const extraTranslations: Array = [ - ...modifierNoName.keys(), - ].map( - (i) => { - const suffixTranslation = modifierAsSuffix(kind, modifierNoName[i]); - const modifierTranslation = [ - ...modifierNoName.slice(0, i), - ...modifierNoName.slice(i + 1), - ].map((modifier) => modifierAs(modifierKind, modifier)); - return rotate([headWord, rotate(modifierTranslation)] as const).map( - ([headWord, modifiers]) => - [...modifiers.slice().reverse(), headWord].join(" "), - ).map( - (translation) => { - if (name) { - return `${translation} named ${name}`; - } else { - return translation; + const preposition = [ + ...nullableAsArray(modifier.inPositionPhrase) + .map((object) => ({ + preposition: { word: "in", emphasis: false }, + object, + })), + ...nullableAsArray(modifier.ofPhrase) + .map((object) => ({ + preposition: { word: "of", emphasis: false }, + object, + })), + ]; + if ( + preposition.length > 1 || + (preposition.length > 0 && postAdjective != null) + ) { + return new Output(); + } + const headNoun = nounForms(headWord.singular, headWord.plural, number) + .map((noun) => ({ + type: "simple" as const, + determiner, + adjective, + noun: { + word: repeatWithSpace(noun.noun, count), + emphasis: phrase.headWord.emphasis != null, + }, + number, + postCompound: null, + postAdjective, + preposition, + emphasis: phrase.emphasis != null && + modifier.nounPreposition == null, + })); + let noun: Output; + if (modifier.nounPreposition == null) { + noun = headNoun; + } else if ( + modifier.ofPhrase != null && modifier.inPositionPhrase != null + ) { + noun = headNoun.map((noun) => ({ + ...modifier.nounPreposition!.noun as English.NounPhrase & { + type: "simple"; + }, + preposition: [{ + preposition: { + word: modifier.nounPreposition!.preposition, + emphasis: false, + }, + object: noun, + }], + emphasis: phrase.emphasis != null, + })); + } else { + noun = new Output(); + } + return noun + .map((noun) => ({ type: "noun", noun }) as PhraseTranslation); + } else if ( + headWord.type === "adjective" && modifier.type === "adverbial" + ) { + const adjective = headWord.adjective; + if (adjective.type === "simple") { + return new Output([{ + type: "adjective", + adjective: { + ...adjective, + adverb: [...modifier.adverb.reverse(), ...adjective.adverb], + emphasis: phrase.emphasis != null, + }, + inWayPhrase: modifier.inWayPhrase, + } as PhraseTranslation]); + } else if ( + adjective.type === "compound" && modifier.adverb.length === 0 + ) { + return new Output([{ + type: "adjective", + adjective, + inWayPhrase: modifier.inWayPhrase, + } as PhraseTranslation]); + } else { + return new Output(); + } + } else { + return new Output(); + } + }); +} +function phrase( + phrase: TokiPona.Phrase, + place: "subject" | "object", +): Output { + switch (phrase.type) { + case "default": + return defaultPhrase(phrase, place); + case "preverb": + case "preposition": + return new Output(); + case "quotation": + return new Output(new TodoError(`translation of ${phrase.type}`)); + } +} +function multiplePhrases( + phrases: TokiPona.MultiplePhrases, + place: "subject" | "object", +): Output { + switch (phrases.type) { + case "single": + return phrase(phrases.phrase, place); + case "and conjunction": + case "anu": { + const conjunction = CONJUNCTION[phrases.type]; + return Output + .combine( + ...phrases.phrases.map((phrases) => multiplePhrases(phrases, place)), + ) + .filterMap((phrase) => { + if (phrase.every((phrase) => phrase.type === "noun")) { + const nouns = phrase + .map((noun) => noun.noun) + .flatMap((noun) => { + if ( + noun.type === "compound" && + noun.conjunction === conjunction + ) { + return noun.nouns; + } else { + return [noun]; + } + }); + let number: English.Quantity; + switch (conjunction) { + case "and": + number = "plural"; + break; + case "or": + number = nouns[nouns.length - 1].number; + break; } - }, - ).flatMap((left) => - suffixTranslation.map((right) => [left, right].join(" ")) + return { + type: "noun", + noun: { + type: "compound", + conjunction, + nouns, + preposition: [], + number, + }, + } as PhraseTranslation; + } else if ( + phrases.type === "anu" && + phrase.every((phrase) => + phrase.type === "adjective" && phrase.inWayPhrase == null + ) + ) { + return { + type: "adjective", + adjective: { + type: "compound", + conjunction, + adjective: phrase + .map((adjective) => + (adjective as PhraseTranslation & { type: "adjective" }) + .adjective + ) + .flatMap((adjective) => { + if ( + adjective.type === "compound" && + adjective.conjunction === conjunction + ) { + return adjective.adjective; + } else { + return [adjective]; + } + }), + }, + } as PhraseTranslation; + } else { + return null; + } + }); + } + } +} +function clause(clause: TokiPona.Clause): Output { + switch (clause.type) { + case "phrases": + return multiplePhrases(clause.phrases, "object").map((phrase) => { + switch (phrase.type) { + case "noun": + return { + type: "subject phrase", + subject: phrase.noun, + } as English.Clause; + case "adjective": + return { + type: "implied it's", + verb: { + type: "linking adjective", + linkingVerb: { + word: "is", + emphasis: false, + }, + adjective: phrase.adjective, + preposition: nullableAsArray(phrase.inWayPhrase) + .map((object) => ({ + preposition: { word: "in", emphasis: false }, + object, + })), + }, + preposition: [], + } as English.Clause; + } + }); + case "o vocative": + return multiplePhrases(clause.phrases, "object").filterMap((phrase) => { + if (phrase.type === "noun") { + return { type: "vocative", call: "hey", addressee: phrase.noun }; + } else { + return null; + } + }); + case "prepositions": + case "li clause": + case "o clause": + case "quotation": + return new Output(new TodoError(`translation of ${clause.type}`)); + } +} +function filler(filler: TokiPona.Emphasis): Array { + switch (filler.type) { + case "word": + return DICTIONARY[filler.word] + .filter((definition) => definition.type === "filler") + .map((definition) => + `${definition.before}${definition.repeat}${definition.after}` ); - }, - ); - return Output.concat(translations, ...extraTranslations); - } else { - return translations; + case "long word": + return DICTIONARY[filler.word] + .filter((definition) => definition.type === "filler") + .map((definition) => + `${definition.before}${ + repeat(definition.repeat, filler.length) + }${definition.after}` + ); + case "multiple a": + return [repeat("ha", filler.count)]; } } -function phraseAs(kind: "noun" | "adjective", phrase: Phrase, options?: { - named?: boolean; - suffix?: boolean; -}): TranslationOutput { - if (phrase.type === "default") { - return defaultPhraseAs(kind, phrase, options); +function emphasisAsPunctuation( + emphasis: undefined | null | TokiPona.Emphasis, + interrogative: boolean, +): null | string { + let questionMark: string; + if (interrogative) { + questionMark = "?"; } else { - return new Output(new TodoError(`translation of ${phrase.type}`)); + questionMark = ""; } + let exclamationMark: string; + if (emphasis == null) { + return null; + } else { + switch (emphasis.type) { + case "word": + switch (emphasis.word as "a" | "n") { + case "a": + exclamationMark = "!"; + break; + case "n": + return null; + } + break; + case "long word": + switch (emphasis.word as "a" | "n") { + case "a": + exclamationMark = repeat("!", emphasis.length); + break; + case "n": + return null; + } + break; + case "multiple a": + return null; + } + } + return `${questionMark}${exclamationMark}`; } -function translateMultiplePhrases( - phrases: MultiplePhrases, - translator: (phrase: Phrase) => TranslationOutput, - level = 2, -): TranslationOutput { - if (phrases.type === "single") { - return translator(phrases.phrase); - } else if (phrases.type === "and conjunction" || phrases.type === "anu") { - let conjunction: string; - if (phrases.type === "and conjunction") { - conjunction = "and"; +function interjection(clause: TokiPona.Clause): Output { + let interjection: Output = new Output(); + if (clause.type === "phrases" && clause.phrases.type === "single") { + const phrase = clause.phrases.phrase; + if (phrase.type === "default" && phrase.modifiers.length === 0) { + const headWord = phrase.headWord; + if (headWord.type === "default" || headWord.type === "reduplication") { + interjection = new Output(DICTIONARY[headWord.word]) + .filterMap((definition) => { + if (definition.type === "interjection") { + switch (headWord.type) { + case "default": + return definition.interjection; + case "reduplication": + return new Array(headWord.count) + .fill(definition.interjection) + .join(" "); + } + } else { + return null; + } + }) + .map((interjection) => + ({ + type: "interjection", + interjection: { + word: interjection, + emphasis: headWord.emphasis != null, + }, + }) as English.Clause + ); + } + } + } + return interjection; +} +function anuSeme(seme: TokiPona.HeadedWordUnit): English.Clause { + let interjection: string; + switch (seme.type) { + case "default": + interjection = "right"; + break; + case "reduplication": + interjection = new Array(seme.count).fill("right").join(" "); + } + return { + type: "interjection", + interjection: { + word: interjection!, + emphasis: seme.emphasis != null, + }, + }; +} +function sentence( + sentence: TokiPona.Sentence, +): Output { + // This relies on sentence filter, if some of those filters were disabled, + // this function might break. + if (sentence.interrogative === "x ala x") { + throw new TodoError('translation of "x ala x"'); + } + if (sentence.finalClause.type === "filler") { + return new Output(filler(sentence.finalClause.emphasis)) + .map((interjection) => + ({ + clauses: [{ + type: "interjection", + interjection: { + word: interjection, + emphasis: false, + }, + }], + punctuation: sentence.punctuation, + }) as English.Sentence + ); + } else { + const startingParticle = ((sentence.laClauses[0] ?? sentence.finalClause) as + & TokiPona.FullClause + & { type: "default" }) + .startingParticle; + let startingFiller: Output; + if (startingParticle == null) { + startingFiller = new Output([null]); } else { - conjunction = "or"; + startingFiller = new Output(filler(startingParticle)) + .map((interjection) => ({ + type: "interjection", + interjection: { + word: interjection, + emphasis: false, + }, + })); } - const translations = rotate( - phrases.phrases.map((phrases) => - translateMultiplePhrases(phrases, translator, level - 1) - ), - ); - if (level === 2) { - return translations.map((phrases) => { - if (phrases.length === 2) { - return [phrases[0], conjunction, phrases[1]].join(" "); - } else { - const comma = phrases.slice(0, phrases.length - 1); - const last = phrases[phrases.length - 1]; - return [ - comma.map((translation) => [translation, ", "].join("")).join(""), - conjunction, - " ", - last, - ].join(""); - } - }); - } else if (level === 1) { - return translations.map((phrases) => - phrases.join([" ", conjunction, " "].join("")) + const laClauses = + (sentence.laClauses as Array) + .map(({ clause }) => clause); + const givenClauses = Output + .combine(...laClauses.map(clause)) + .map((clauses) => + clauses.map((clause) => + ({ + type: "dependent", + conjunction: { + word: "given", + emphasis: false, + }, + clause, + }) as English.Clause + ) + ); + const { + kinOrTaso, + clause: lastTpClause, + anuSeme: tpAnuSeme, + endingParticle, + } = sentence.finalClause; + if (kinOrTaso != null) { + return new Output( + new TodoError(`translation of "${kinOrTaso.word}" preclause`), ); + } + const lastEngClause = clause(lastTpClause); + let right: Array; + if (tpAnuSeme == null) { + right = []; } else { - throw new Error("unreachable"); + right = [anuSeme(tpAnuSeme)]; } - } else { - throw new Error("unreachable"); + let interjectionClause: Output; + if ( + sentence.laClauses.length === 0 && kinOrTaso == null && tpAnuSeme == null + ) { + interjectionClause = interjection(lastTpClause); + } else { + interjectionClause = new Output(); + } + const engClauses = Output.combine( + startingFiller, + givenClauses, + Output.concat(lastEngClause, interjectionClause), + ) + .map(([filler, givenClauses, lastClause]) => [ + ...nullableAsArray(filler), + ...givenClauses, + lastClause, + ...right, + ]); + let endingFiller: Output; + if (endingParticle == null) { + endingFiller = new Output([null]); + } else { + endingFiller = new Output(filler(endingParticle)) + .map((interjection) => ({ + type: "interjection", + interjection: { + word: interjection, + emphasis: false, + }, + })); + } + let punctuation: string; + if (sentence.interrogative) { + punctuation = "?"; + } else { + punctuation = sentence.punctuation; + } + return Output.concat( + Output.combine( + engClauses, + new Output( + nullableAsArray( + emphasisAsPunctuation( + endingParticle, + sentence.interrogative != null, + ), + ), + ), + ) + .map(([clauses, punctuation]) => ({ clauses, punctuation })), + Output.combine(engClauses, endingFiller) + .map(([clauses, filler]) => ({ + clauses: [...clauses, ...nullableAsArray(filler)], + punctuation, + })), + ); } } -/** Translates a clause. */ -function translateClause(clause: Clause): TranslationOutput { - if (clause.type === "phrases") { - const hasEn = (phrases: MultiplePhrases): boolean => { - if (phrases.type === "single") { - return false; - } else if (phrases.type === "and conjunction") { - return true; - } else if (phrases.type === "anu") { - return phrases.phrases.some(hasEn); +function nounAsPlainString(definition: Dictionary.Noun): Output { + return simpleNounForms(definition.singular, definition.plural) + .map((noun) => + [ + ...definition.determiner.map((determiner) => determiner.determiner), + ...definition.adjective.map((adjective) => adjective.adjective), + noun, + ...nullableAsArray(definition.postAdjective) + .map((adjective) => `${adjective.adjective} ${adjective.name}`), + ].join(" ") + ); +} +function verbAsPlainString( + verb: { presentPlural: string; past: string }, +): Output { + switch (settings.get("tense-settings")) { + case "both": + return new Output([ + verb.past, + verb.presentPlural, + `will ${verb.presentPlural}`, + ]); + case "condensed": + return new Output([ + `(will) ${condenseVerb(verb.presentPlural, verb.past)}`, + ]); + case "default only": + return new Output([verb.presentPlural]); + } +} +function definitionAsPlainString( + definition: Dictionary.Definition, +): Output { + switch (definition.type) { + case "noun": + return nounAsPlainString(definition); + case "personal pronoun": + return new Output([ + ...nullableAsArray(definition.singular?.subject), + ...nullableAsArray(definition.singular?.object), + ...nullableAsArray(definition.plural?.subject), + ...nullableAsArray(definition.plural?.object), + ]); + case "adjective": + return new Output([ + `${definition.adverb.join(" ")} ${definition.adjective}`, + ]); + case "compound adjective": { + const { adjective } = definition; + if (adjective.length === 2) { + return new Output([ + adjective + .map((adjective) => adjective.adjective) + .join(" and "), + ]); } else { - throw new Error("unreachable"); + const lastIndex = adjective.length - 1; + const init = adjective.slice(0, lastIndex); + const last = adjective[lastIndex]; + return new Output([ + `${ + init.map((adjective) => adjective.adjective).join(", ") + }, and ${last.adjective}`, + ]); } - }; - const phrases = clause.phrases; - const translations = translateMultiplePhrases( - phrases, - (phrase) => phraseAs("noun", phrase), - ); - if (hasEn(phrases)) { - return translations; - } else { - return Output.concat( - translateMultiplePhrases( - phrases, - (phrase) => phraseAs("adjective", phrase), - ), - translations, + } + case "determiner": + return simpleNounForms(definition.determiner, definition.plural); + case "adverb": + return new Output([definition.adverb]); + case "interjection": + return new Output([definition.interjection]); + case "verb": { + const verbs = verbAsPlainString(definition); + const directObject = Output.combine( + ...nullableAsArray(definition.directObject) + .map(nounAsPlainString), + ); + const indirectObject = Output.combine( + ...definition.indirectObject + .map((object) => + nounAsPlainString(object.object) + .map((noun) => `${object.preposition} ${noun}`) + ), ); + return Output.combine(verbs, directObject, indirectObject) + .map(([verb, directObject, indirectObject]) => + [ + verb, + ...directObject, + ...indirectObject, + ].join(" ") + ); } - } else if (clause.type === "o vocative") { - return translateMultiplePhrases( - clause.phrases, - (phrase) => phraseAs("noun", phrase).map((phrase) => `hey ${phrase}`), - ); - } else { - return new Output(new TodoError(`translation for ${clause.type}`)); + case "filler": + return new Output([ + `${definition.before}${definition.repeat}${definition.after}`, + ]); + case "particle definition": + return new Output([definition.definition]); + case "noun preposition": + return nounAsPlainString(definition.noun) + .map((noun) => `${noun} ${definition.preposition}`); + case "numeral": + return new Output([`${definition.numeral}`]); + case "preposition": + return new Output([definition.preposition]); + case "preverb as linking verb": + return new Output([definition.linkingVerb]); + case "preverb as finite verb": + return verbAsPlainString(definition); + case "preverb as modal verb": + return new Output([definition.verb]); } } -/** Translates a full clause. */ -function translateFullClause(fullClause: FullClause): TranslationOutput { - let but = ""; - const taso = fullClause.taso; - if (taso) { - if (taso.type === "default") { - but = "but "; - } else if (taso.type === "reduplication") { - but = new Array(taso.count).fill("but ").join(""); - } - } - let isntIt = ""; - const anuSeme = fullClause.anuSeme; - if (anuSeme) { - if (anuSeme.type === "default") { - isntIt = ", isn't it"; - } else if (anuSeme.type === "reduplication") { - // TODO: better translation - isntIt = new Array(anuSeme.count).fill(", isn't it").join(""); +function multipleSentences( + sentences: TokiPona.MultipleSentences, +): Output> { + switch (sentences.type) { + case "single word": { + const { word } = sentences; + return new Output(DICTIONARY[word]) + .flatMap(definitionAsPlainString) + .map((definition) => + ({ + clauses: [{ type: "free form", text: definition }], + punctuation: "", + }) as English.Sentence + ) + .map((definition) => [definition]); } + case "sentences": + return Output.combine(...sentences.sentences.map(sentence)); } - return translateClause(fullClause.clause).map((clause) => - [but, clause, isntIt].join("") - ); -} -/** Translates a single sentence. */ -function translateSentence(sentence: Sentence): TranslationOutput { - return rotate(sentence.laClauses.map(translateFullClause)).map((clauses) => { - const contexts = clauses.slice(0, clauses.length - 1); - const final = clauses[clauses.length - 1]; - return [ - ...contexts.map((context) => `given ${context}, `), - final, - sentence.punctuation, - ].join(""); - }); -} -/** Translates multiple sentences. */ -function translateSentences(sentences: Array): TranslationOutput { - return rotate(sentences.map(translateSentence)).map((sentences) => - sentences.join(" ") - ); } -/** Full Toki Pona translator. */ -export function translate(src: string): TranslationOutput { - return parser(src).flatMap(translateSentences); +export function translate(src: string): Output> { + return parse(src).flatMap(multipleSentences); } diff --git a/src/ucsur.ts b/src/ucsur.ts new file mode 100644 index 0000000..928de00 --- /dev/null +++ b/src/ucsur.ts @@ -0,0 +1,161 @@ +/** Module for constants and other helper items for UCSUR. */ + +/** */ +export const START_OF_CARTOUCHE = "\u{F1990}"; +export const END_OF_CARTOUCHE = "\u{F1991}"; +export const COMBINING_CARTOUCHE_EXTENSION = "\u{F1992}"; +export const START_OF_LONG_PI = "\u{F1993}"; +export const COMBINING_LONG_PI_EXTENSION = "\u{F1994}"; +export const STACKING_JOINER = "\u{F1995}"; +export const SCALING_JOINER = "\u{F1996}"; +export const START_OF_LONG_GLYPH = "\u{F1997}"; +export const END_OF_LONG_GLYPH = "\u{F1998}"; +export const COMBINING_LONG_GLYPH_EXTENSION = "\u{F1999}"; +export const START_OF_REVERSE_LONG_GLYPH = "\u{F199A}"; +export const END_OF_REVERSE_LONG_GLYPH = "\u{F199B}"; +export const MIDDLE_DOT = "\u{F199C}"; +export const COLON = "\u{F199D}"; + +export const UCSUR_TO_LATIN: { [ucsur: string]: string } = { + "󱤀": "a", + "󱤁": "akesi", + "󱤂": "ala", + "󱤃": "alasa", + "󱤄": "ale", + "󱤅": "anpa", + "󱤆": "ante", + "󱤇": "anu", + "󱤈": "awen", + "󱤉": "e", + "󱤊": "en", + "󱤋": "esun", + "󱤌": "ijo", + "󱤍": "ike", + "󱤎": "ilo", + "󱤏": "insa", + "󱤐": "jaki", + "󱤑": "jan", + "󱤒": "jelo", + "󱤓": "jo", + "󱤔": "kala", + "󱤕": "kalama", + "󱤖": "kama", + "󱤗": "kasi", + "󱤘": "ken", + "󱤙": "kepeken", + "󱤚": "kili", + "󱤛": "kiwen", + "󱤜": "ko", + "󱤝": "kon", + "󱤞": "kule", + "󱤟": "kulupu", + "󱤠": "kute", + "󱤡": "la", + "󱤢": "lape", + "󱤣": "laso", + "󱤤": "lawa", + "󱤥": "len", + "󱤦": "lete", + "󱤧": "li", + "󱤨": "lili", + "󱤩": "linja", + "󱤪": "lipu", + "󱤫": "loje", + "󱤬": "lon", + "󱤭": "luka", + "󱤮": "lukin", + "󱤯": "lupa", + "󱤰": "ma", + "󱤱": "mama", + "󱤲": "mani", + "󱤳": "meli", + "󱤴": "mi", + "󱤵": "mije", + "󱤶": "moku", + "󱤷": "moli", + "󱤸": "monsi", + "󱤹": "mu", + "󱤺": "mun", + "󱤻": "musi", + "󱤼": "mute", + "󱤽": "nanpa", + "󱤾": "nasa", + "󱤿": "nasin", + "󱥀": "lupa", + "󱥁": "ni", + "󱥂": "nimi", + "󱥃": "noka", + "󱥄": "o", + "󱥅": "olin", + "󱥆": "ona", + "󱥇": "open", + "󱥈": "pakala", + "󱥉": "pali", + "󱥊": "palisa", + "󱥋": "pan", + "󱥌": "pana", + "󱥍": "pi", + "󱥎": "pilin", + "󱥏": "pimeja", + "󱥐": "pini", + "󱥑": "pipi", + "󱥒": "poka", + "󱥓": "poki", + "󱥔": "pona", + "󱥕": "pu", + "󱥖": "sama", + "󱥗": "seli", + "󱥘": "selo", + "󱥙": "seme", + "󱥚": "sewi", + "󱥛": "sijelo", + "󱥜": "sike", + "󱥝": "sin", + "󱥞": "sina", + "󱥟": "sinpin", + "󱥠": "sitelen", + "󱥡": "sona", + "󱥢": "soweli", + "󱥣": "suli", + "󱥤": "suno", + "󱥥": "supa", + "󱥦": "suwi", + "󱥧": "tan", + "󱥨": "taso", + "󱥩": "tawa", + "󱥪": "telo", + "󱥫": "tenpo", + "󱥬": "toki", + "󱥭": "tomo", + "󱥮": "tu", + "󱥯": "unpa", + "󱥰": "uta", + "󱥱": "utala", + "󱥲": "walo", + "󱥳": "wan", + "󱥴": "waso", + "󱥵": "wawa", + "󱥶": "weka", + "󱥷": "wile", + "󱥸": "namako", + "󱥹": "kin", + "󱥺": "oko", + "󱥻": "kipisi", + "󱥼": "leko", + "󱥽": "monsuta", + "󱥾": "tonsi", + "󱥿": "jasima", + "󱦀": "kijetesantakalu", + "󱦁": "soko", + "󱦂": "meso", + "󱦃": "epiku", + "󱦄": "kokosila", + "󱦅": "lanpan", + "󱦆": "n", + "󱦇": "misikeke", + "󱦈": "ku", + "󱦠": "pake", + "󱦡": "apeja", + "󱦢": "majuna", + "󱦣": "powe", +}; diff --git a/src/vocabulary.ts b/src/vocabulary.ts deleted file mode 100644 index 360d39c..0000000 --- a/src/vocabulary.ts +++ /dev/null @@ -1,155 +0,0 @@ -/** Particles. */ -export const PARTICLES = new Set([ - "a", - "ala", - "anu", - "e", - "en", - "la", - "li", - "nanpa", - "o", - "pi", - "taso", -]); -/** Content words. */ -export const CONTENT_WORD = new Set([ - "akesi", - "ala", - "alasa", - "ale", - "ali", - "anpa", - "ante", - "awen", - "esun", - "ijo", - "ike", - "ilo", - "insa", - "jaki", - "jan", - "jelo", - "jo", - "kala", - "kalama", - "kama", - "kasi", - "ken", - "kepeken", - "kili", - "kiwen", - "ko", - "kon", - "kule", - "kulupu", - "kute", - "lape", - "laso", - "lawa", - "len", - "lete", - "lili", - "linja", - "lipu", - "loje", - "lon", - "luka", - "lukin", - "lupa", - "ma", - "mama", - "mani", - "meli", - "mi", - "mije", - "moku", - "moli", - "monsi", - "mu", - "mun", - "musi", - "mute", - "nanpa", - "nasa", - "nasin", - "nena", - "ni", - "nimi", - "noka", - "olin", - "ona", - "open", - "pakala", - "pali", - "palisa", - "pan", - "pana", - "pilin", - "pimeja", - "pini", - "pipi", - "poka", - "poki", - "pona", - "pu", - "sama", - "seli", - "selo", - "seme", - "sewi", - "sijelo", - "sike", - "sin", - "sina", - "sinpin", - "sitelen", - "sona", - "soweli", - "suli", - "suno", - "supa", - "suwi", - "tan", - "taso", - "tawa", - "telo", - "tenpo", - "toki", - "tomo", - "tonsi", - "tu", - "unpa", - "uta", - "utala", - "walo", - "wan", - "waso", - "wawa", - "weka", - "wile", -]); -/** Special subjects that doesn't use _li_ */ -export const SPECIAL_SUBJECT = new Set(["mi", "sina"]); -export const NUMBER = new Set(["wan", "tu", "luka", "mute", "ale", "ali"]); -export const PREVERB = new Set([ - "alasa", - "awen", - "kama", - "ken", - "lukin", - "open", - "pini", - "sona", - "wile", -]); -/** Prepositions. */ -export const PREPOSITION = new Set([ - "kepeken", - "lon", - "sama", - "tan", - "tawa", -]); -/** Full vocabulary. */ -export const VOCABULARY = new Set([...PARTICLES, ...CONTENT_WORD]); diff --git a/style.css b/style.css deleted file mode 100644 index ccfc037..0000000 --- a/style.css +++ /dev/null @@ -1,41 +0,0 @@ -body { - margin: 10px; - font-family: sans-serif; -} -a { - color: #0057af; -} -a:visited { - color: #551a8b; -} -#input { - box-sizing: border-box; - resize: vertical; - width: 100%; -} -#error { - color: #b60000; -} -@media (min-width: 800px) { - body { - margin: 50px; - } -} -@media (prefers-color-scheme: dark) { - body { - background-color: black; - color: white; - } - a { - color: #3197ff; - } - a:visited { - color: #b47de7; - } - #error { - color: #ff5e5e; - } -} -summary { - cursor: pointer; -} diff --git a/telo-misikeke/build.ts b/telo-misikeke/build.ts new file mode 100644 index 0000000..8a726d5 --- /dev/null +++ b/telo-misikeke/build.ts @@ -0,0 +1,45 @@ +/** Build codes for telo misikeke source codes. */ + +/** */ +const SOURCE = [ + { + source: + "https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/-/raw/main/public/rules.js?ref_type=heads&inline=false", + destination: new URL("./rules.js", import.meta.url), + exportItems: ["build_rules", "getMessage"], + }, + { + source: + "https://gitlab.com/telo-misikeke/telo-misikeke.gitlab.io/-/raw/main/public/Parser.js?ref_type=heads&inline=false", + destination: new URL("./Parser.js", import.meta.url), + exportItems: ["ParserWithCallbacks"], + }, +]; +async function buildFile( + source: string, + destination: URL, + exportItems: Array, +): Promise { + // fetch source code + const response = await fetch(source); + if (!response.ok) { + throw new Error( + `unable to fetch ${source} (${response.status} ${response.statusText})`, + ); + } + let file = await response.text(); + + // add `export` + file = file + `;export{${exportItems.join(",")}};`; + + //write the code + await Deno.writeTextFile(destination, file); +} +export async function buildTeloMisikeke(): Promise { + await Promise.all( + SOURCE + .map((file) => + buildFile(file.source, file.destination, file.exportItems) + ), + ); +} diff --git a/telo-misikeke/linku-data.js b/telo-misikeke/linku-data.js new file mode 100644 index 0000000..fe66f58 --- /dev/null +++ b/telo-misikeke/linku-data.js @@ -0,0 +1,259 @@ +/** Transformed Linku data required for telo misikeke. */ +export const DATA = [ + ["a", "core"], + ["aka", "obscure"], + ["akesi", "core"], + ["ako", "obscure"], + ["ala", "core"], + ["alasa", "core"], + ["ale", "core"], + ["alente", "obscure"], + ["ali", "uncommon"], + ["alu", "obscure"], + ["anpa", "core"], + ["ante", "core"], + ["anu", "core"], + ["apeja", "uncommon"], + ["awase", "obscure"], + ["awen", "core"], + ["e", "core"], + ["eki", "obscure"], + ["en", "core"], + ["enko", "obscure"], + ["epiku", "uncommon"], + ["esun", "core"], + ["ete", "obscure"], + ["ewe", "obscure"], + ["i", "obscure"], + ["ijo", "core"], + ["ike", "core"], + ["iki", "obscure"], + ["ilo", "core"], + ["insa", "core"], + ["ipi", "obscure"], + ["isipin", "rare"], + ["itomi", "obscure"], + ["jaki", "core"], + ["jaku", "obscure"], + ["jalan", "obscure"], + ["jami", "obscure"], + ["jan", "core"], + ["jans", "obscure"], + ["jasima", "uncommon"], + ["je", "obscure"], + ["jelo", "core"], + ["jo", "core"], + ["jonke", "obscure"], + ["ju", "obscure"], + ["jule", "obscure"], + ["jume", "obscure"], + ["kala", "core"], + ["kalama", "core"], + ["kalamARR", "obscure"], + ["kalijopilale", "obscure"], + ["kama", "core"], + ["kamalawala", "obscure"], + ["kan", "obscure"], + ["kapa", "obscure"], + ["kapesi", "rare"], + ["kasi", "core"], + ["ke", "obscure"], + ["ken", "core"], + ["kepeken", "core"], + ["kepen", "obscure"], + ["kese", "obscure"], + ["ki", "obscure"], + ["kijetesantakalu", "widespread"], + ["kiki", "uncommon"], + ["kili", "core"], + ["kin", "widespread"], + ["kipisi", "widespread"], + ["kisa", "obscure"], + ["kiwen", "core"], + ["ko", "core"], + ["kokosila", "uncommon"], + ["kon", "core"], + ["konsi", "obscure"], + ["konwe", "obscure"], + ["kosan", "obscure"], + ["ku", "widespread"], + ["kule", "core"], + ["kulijo", "obscure"], + ["kulu", "obscure"], + ["kulupu", "core"], + ["kuntu", "obscure"], + ["kute", "core"], + ["kutopoma", "obscure"], + ["la", "core"], + ["lanpan", "common"], + ["lape", "core"], + ["laso", "core"], + ["lawa", "core"], + ["leko", "widespread"], + ["len", "core"], + ["lete", "core"], + ["li", "core"], + ["lijokuku", "obscure"], + ["likujo", "obscure"], + ["lili", "core"], + ["linja", "core"], + ["linluwi", "uncommon"], + ["lipu", "core"], + ["lo", "obscure"], + ["loje", "core"], + ["loka", "obscure"], + ["lokon", "obscure"], + ["lon", "core"], + ["lu", "obscure"], + ["luka", "core"], + ["lukin", "core"], + ["lupa", "core"], + ["ma", "core"], + ["majuna", "uncommon"], + ["mama", "core"], + ["mani", "core"], + ["meli", "widespread"], + ["melome", "obscure"], + ["meso", "common"], + ["mi", "core"], + ["mije", "widespread"], + ["mijomi", "obscure"], + ["misa", "rare"], + ["misikeke", "common"], + ["moku", "core"], + ["moli", "core"], + ["molusa", "obscure"], + ["monsi", "core"], + ["monsuta", "widespread"], + ["mu", "core"], + ["mulapisu", "obscure"], + ["mun", "core"], + ["musi", "core"], + ["mute", "core"], + ["n", "common"], + ["nalanja", "obscure"], + ["namako", "widespread"], + ["nanpa", "core"], + ["nasa", "core"], + ["nasin", "core"], + ["natu", "obscure"], + ["neja", "obscure"], + ["nele", "obscure"], + ["nena", "core"], + ["ni", "core"], + ["nimi", "core"], + ["nimisin", "rare"], + ["nja", "obscure"], + ["noka", "core"], + ["nu", "obscure"], + ["o", "core"], + ["ojuta", "obscure"], + ["oke", "rare"], + ["okepuma", "obscure"], + ["oki", "obscure"], + ["oko", "common"], + ["olin", "core"], + ["omekalike", "obscure"], + ["omekapo", "rare"], + ["omen", "obscure"], + ["ona", "core"], + ["oni", "obscure"], + ["open", "core"], + ["owe", "obscure"], + ["pa", "obscure"], + ["pakala", "core"], + ["pake", "rare"], + ["pali", "core"], + ["palisa", "core"], + ["pan", "core"], + ["pana", "core"], + ["pasila", "obscure"], + ["pata", "obscure"], + ["peta", "obscure"], + ["peto", "obscure"], + ["pi", "core"], + ["pika", "obscure"], + ["pilin", "core"], + ["pimeja", "core"], + ["Pingo", "obscure"], + ["pini", "core"], + ["pipi", "core"], + ["pipo", "obscure"], + ["po", "obscure"], + ["poka", "core"], + ["poki", "core"], + ["polinpin", "obscure"], + ["pomotolo", "obscure"], + ["pona", "core"], + ["poni", "obscure"], + ["powe", "uncommon"], + ["pu", "core"], + ["puwa", "rare"], + ["sama", "core"], + ["samu", "obscure"], + ["san", "obscure"], + ["seli", "core"], + ["selo", "core"], + ["seme", "core"], + ["sewi", "core"], + ["sijelo", "core"], + ["sike", "core"], + ["sikomo", "obscure"], + ["sin", "core"], + ["sina", "core"], + ["sinpin", "core"], + ["sipi", "obscure"], + ["sitelen", "core"], + ["slape", "obscure"], + ["soko", "widespread"], + ["sona", "core"], + ["soto", "obscure"], + ["soweli", "core"], + ["su", "obscure"], + ["suke", "obscure"], + ["suli", "core"], + ["suno", "core"], + ["supa", "core"], + ["sutopatikuna", "obscure"], + ["suwi", "core"], + ["taki", "rare"], + ["tan", "core"], + ["taso", "core"], + ["tawa", "core"], + ["te", "rare"], + ["teje", "obscure"], + ["telo", "core"], + ["ten", "obscure"], + ["tenpo", "core"], + ["to", "rare"], + ["tokana", "obscure"], + ["toki", "core"], + ["toma", "obscure"], + ["tomo", "core"], + ["tonsi", "widespread"], + ["tu", "core"], + ["tuli", "obscure"], + ["u", "obscure"], + ["umesu", "obscure"], + ["unpa", "core"], + ["unu", "rare"], + ["usawi", "rare"], + ["uta", "core"], + ["utala", "core"], + ["wa", "rare"], + ["waleja", "obscure"], + ["walo", "core"], + ["wan", "core"], + ["waso", "core"], + ["wasoweli", "obscure"], + ["wawa", "core"], + ["wawajete", "obscure"], + ["we", "obscure"], + ["weka", "core"], + ["wekama", "obscure"], + ["wi", "obscure"], + ["wile", "core"], + ["wuwojiti", "rare"], + ["yupekosi", "rare"], + ["yutu", "obscure"], +]; diff --git a/telo-misikeke/telo-misikeke.js b/telo-misikeke/telo-misikeke.js new file mode 100644 index 0000000..af37146 --- /dev/null +++ b/telo-misikeke/telo-misikeke.js @@ -0,0 +1,23 @@ +/** Glue code for telo misikeke */ + +import { ParserWithCallbacks } from "./Parser.js"; +import { build_rules, getMessage } from "./rules.js"; +import { DATA } from "./linku-data.js"; + +const RULES = build_rules(DATA); + +/** Gets all telo misikeke error messages. */ +export function errors(text) { + return new ParserWithCallbacks(RULES, false) + .tokenize(text) + .filter((token) => RULES[token.ruleName].category === "error") + .map((token) => { + const src = token.text + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll("&", "&"); + const message = getMessage(token.ruleName, token.match) + .replace(/\n/g, "
        "); + return `"${src}" ${message}`; + }); +}