Skip to content

Commit

Permalink
Contribute Morphuntion from Apple as open source software.
Browse files Browse the repository at this point in the history
This contribution should resolve the following issues: #5, #6, #7, #11, #12, #13, #15, #17, #18, #19
This contribution is also related to the following issues without fully resolving the issues: 3, 4, 8, 10, 21, 23, 24, 25
This contribution also has an implementation that addresses these CLDR issues: 13025, 13563
  • Loading branch information
grhoten committed Nov 30, 2024
1 parent 372d4da commit e90364a
Show file tree
Hide file tree
Showing 836 changed files with 144,116 additions and 0 deletions.
27 changes: 27 additions & 0 deletions morphuntion/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#
# Copyright 2016-2024 Apple Inc. All rights reserved.
#
# Metadata directory
.DS_Store
.gradle
.idea
# Temporary build directory
build
dist
# Generated documentation
docs/headers
docs/html
docs/xml
docs/pages
# Vim
*.swp
# Cmake ignores
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
CMakeBuild*
cmake-build-*
options.mk
# Gradle
gradle/wrapper/gradle-wrapper.jar
136 changes: 136 additions & 0 deletions morphuntion/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#
# Copyright 2018-2024 Apple Inc. All rights reserved.
#
cmake_minimum_required(VERSION 3.24)
include(ExternalProject)
include(CheckCXXCompilerFlag)

list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
set(VERSIONS_MK_PATH ${CMAKE_SOURCE_DIR}/cmake/query_versions.mk)
set(OPTIONS_MK_PATH ${CMAKE_SOURCE_DIR}/cmake/build_options.mk)
include(morphuntionMacros)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set(CMAKE_INSTALL_MESSAGE NEVER)

# Declare morphuntion project
project(
Morphuntion
LANGUAGES C CXX
)

# Configure number of processors
get_num_processors(NUM_PROCESSORS_VAL)
morphuntion_debug_vars(NUM_PROCESSORS_VAL)

# Morphuntion cache variables
set(NUM_PROCESSORS ${NUM_PROCESSORS_VAL} CACHE STRING "Number of cores to be used in make")
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CXX_STD_LIB_FLAG -stdlib=libc++ CACHE STRING "C++ Standard library to link against")
endif()

# Morphun options
option(PROFILING "Turn on code profiling" OFF)

add_compile_options(${CXX_STD_LIB_FLAG})
add_link_options(${CXX_STD_LIB_FLAG})

# Setting c++20 standard
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Equivalent to -fvisibility=hidden flag
set(CMAKE_CXX_VISIBILITY_PRESET hidden)

# Morphuntion version
set(TAG_PREFIX Morphuntion)
get_morphuntion_version(MORPHUNTION_VERSION_TAG)
set(MORPHUNTION_VERSION ${MORPHUNTION_VERSION_TAG} CACHE STRING "Version of morphuntion to be used in publishing")
morphuntion_debug_vars(MORPHUNTION_VERSION)

set(CMAKE_INSTALL_LIBDIR lib)
include(GNUInstallDirs)

# Optionally compile with code profiling
if(PROFILING)
message("-- PROFILING TURNED ON")
add_compile_options(-g -fprofile-instr-generate -fcoverage-mapping)
add_link_options(-g -fprofile-instr-generate -fcoverage-mapping)
endif()

# Set these warning properties on a project level
add_compile_options(-Wall -Weffc++ -Wextra)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
add_compile_options(-Wglobal-constructors -Wexit-time-destructors -Wweak-vtables -Wvla-extension)
endif()

# Improve code security
add_compile_options(-Werror=format-security -fstack-protector-strong)
add_compile_definitions(_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_EXTENSIVE)

# Add link time optimization for release build types for macOS
if(APPLE AND ("${CMAKE_BUILD_TYPE}" MATCHES "MinSizeRel" OR "${CMAKE_BUILD_TYPE}" MATCHES "Release"))
add_compile_options(-flto)
add_link_options(-flto)
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
# Improve code security
add_compile_definitions(_FORTIFY_SOURCE=2)
endif()
endif()

# Set Morphuntion include, data directories
set(MORPHUNTION_INCLUDE_ROOT ${CMAKE_BINARY_DIR}/morphuntion_headers)
set(MORPHUNTION_DATA_ROOT_PREFIX ${CMAKE_BINARY_DIR}/morphuntion_data)
set(MORPHUNTION_DATA_ROOT ${MORPHUNTION_DATA_ROOT_PREFIX}${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATADIR})

file(MAKE_DIRECTORY ${MORPHUNTION_DATA_ROOT})
file(MAKE_DIRECTORY ${MORPHUNTION_INCLUDE_ROOT})

include(dependICU)

add_library(xml2 INTERFACE IMPORTED GLOBAL)
set_target_properties(xml2 PROPERTIES IMPORTED_LIBNAME xml2)
target_include_directories(xml2 INTERFACE ${CMAKE_OSX_SYSROOT}/usr/include/libxml2)

# Runs Morphuntion unit tests: "make check"
set(DYLD_LIBRARY_PATH ${ICU_LIB_DIRECTORY}:$<TARGET_FILE_DIR:morphuntion>)
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
set(DYLD_LIBRARY_PATH ${DYLD_LIBRARY_PATH}:$<TARGET_PROPERTY:CoreFoundation,INTERFACE_LINK_DIRECTORIES>)
endif()

add_subdirectory(ext EXCLUDE_FROM_ALL)
add_subdirectory(tools EXCLUDE_FROM_ALL)
add_subdirectory(resources)
add_subdirectory(src)
add_subdirectory(test EXCLUDE_FROM_ALL)

add_custom_target(dist
COMMAND sh -c "DESTDIR=${CMAKE_CURRENT_BINARY_DIR}/dist ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_install.cmake"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
VERBATIM
)
add_dependencies(dist morphuntion morphuntion-headers morphuntion-data)

add_subdirectory(docs EXCLUDE_FROM_ALL)

# make list-commands
add_custom_target(list-commands
COMMAND echo "\
make list-commands : Shows this message\\n\\n\
make check : Runs unit tests\\n\
make check-headers : Tests whether all exported headers can be compiled independently.\\n\
make morphuntion : Builds the shared library.\\n\
make morphuntion-headers : Copy all morphuntion public headers to <build>/morphuntion_headers.\\n\
make morphuntion-data : Generate all morphuntion data under <build>/morphuntion_data.\\n\
make dist : Builds morphuntion, the headers, and the data.\\n\
make coverage : Generates code coverage using sonar-scanner\\n\
make generate-coverage-csv : Generates code coverage as a csv\\n\
"
VERBATIM
)
# end section

install(TARGETS morphuntion LIBRARY COMPONENT morphuntion_library)
install(DIRECTORY ${MORPHUNTION_INCLUDE_ROOT}/ TYPE INCLUDE COMPONENT morphuntion_headers)
install(DIRECTORY ${MORPHUNTION_DATA_ROOT}/ TYPE DATA COMPONENT morphuntion_data)
101 changes: 101 additions & 0 deletions morphuntion/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<!--
Copyright 2016-2024 Apple Inc. All rights reserved.
-->
# Morphuntion

## About Morphuntion

Morphuntion is a C/C++ library that provides support for the following tasks.

- Word inflection of a word into another [surface form](https://en.wikipedia.org/wiki/Surface_form) of another word.
- Grammatical agreement between words.
- Querying grammatical properties as [grammemes](https://en.wiktionary.org/wiki/grammeme) (the values of [grammatical categories](https://en.wikipedia.org/wiki/Grammatical_category))

It uses C++20, [ICU4C](https://icu.unicode.org/), UTF-16 strings (just like Java) and a data source of lexical
dictionaries that contain relationships between inflections of a word. Just like ICU, it is thread safe between service
objects, but mutable objects are not necessarily thread safe between threads.

By making this implementation open sourced, various software frameworks can generate grammatically correct messages
and to lower the barriers to correctly localizing software.

### Platforms
Morphuntion is currently supported on these operating systems:

* iOS
* iPadOS
* macOS
* tvOS
* watchOS
* visionOS
* UBI Linux 9
* Ubuntu Linux 22

## How Morphuntion works
The following sections delve a bit deeper into the low-level functionality of Morphuntion, such as how caching,
multi-threading, work with Morphuntion. These sections are meant as a guide to utilizing Morphuntion in a
safe manner while also squeezing the most potential out of the library as possible.

### Caching
At the time of writing, caching is a one-way street. Once an object that utilizes caching functionality with some data,
it remains in-memory until the process has terminated. Reloading of such caches are not supported, since that involves
ensuring that all dependencies in the process space sharing the same resources have also stopped and released the same
resources.

The caching being done by Morphuntion lowers the lookup time for many portions of the
`morphuntion::dialog::CommonConceptFactory`
operations. It is for this reason that it may be a good idea to initialize these constructs before lookup time, so
that Morphuntion is in a "warmed up" state.

It is important to note that many of these cached data structures have ties to specific references in Morphuntion's
memory-mapped dictionaries. This makes reloading dictionaries difficult.

#### Grammar synthesizer caching
Grammar synthesizers memory map lexical dictionaries and cache various grammatical structures depending on the language.
Synthesized words are not cached.

### Multi-threading
Morphuntion is multi-thread friendly. It has <code>std::mutex</code> in places where deadlocks could occur, and
generally tries to abstract this away from users.

## History

This project was donated to the Unicode consortium from Siri at Apple Inc. These additional
resources may be helpful background information to reference:

* [Automatic Grammar Agreement in Message Formatting](https://www.youtube.com/watch?v=C2e7hYIkqoM) ([2023.11.8](https://www.unicode.org/events/utw/2023/talks/grammar/))
* [Authoring Grammatically Correct Conversational Templates for Siri](https://www.youtube.com/watch?v=emlIWUTaJFM) ([2020.10.16](https://www.unicodeconference.org/iuc44/Conference_Program.pdf))
* [Let's Come To An Agreement About Our Words](https://www.youtube.com/watch?v=KclVxxHX26k) ([2017.02.16](https://www.imug.org/events/imug-2017-events.htm#words))

## Dependencies

The following are the dependencies to use this code.

| Library | runtime | build time | test time | Note |
|---------------------------------------------------|:-------:|:----------:|:---------:|--------------------------|
| [CoreFoundation](https://www.swift.org/) |||| automatically downloaded |
| [ICU4C](https://icu.unicode.org/) |||| |
| [marisa](https://github.com/s-yata/marisa-trie) |||| statically linked |
| [cmake](https://cmake.org/) | || | |
| [libxml2](https://gitlab.gnome.org/GNOME/libxml2) | ||| |
| [Catch2](https://github.com/catchorg/Catch2/) | | || automatically downloaded |

### Building

Before building this project, you must have a distribution of ICU4C available. The path to the ICU distribution must be
set as ICU_ROOT in either options.mk or as a command line argument to cmake. The path should be the same as the
--prefix value used when ICU was configured, built and installed.

If you want to build this project faster, you can adjust the number of concurrent build jobs used when compiling.

```
cd morphuntion
mkdir build
cd build
CC=clang CXX=clang++ cmake -DICU_ROOT=<PATH_TO_ICU> ..
make -j 8 check
```

Optionally, ICU_ROOT can be specified in the file options.mk with the following type of syntax.
```
ICU_ROOT=<PATH_TO_ICU>
```
Loading

0 comments on commit e90364a

Please sign in to comment.