diff --git a/packages/llama-cpp/0001-fix-compile-options.patch b/packages/llama-cpp/0001-fix-compile-options.patch new file mode 100644 index 000000000000000..760ab0e3b13e3d7 --- /dev/null +++ b/packages/llama-cpp/0001-fix-compile-options.patch @@ -0,0 +1,11 @@ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -434,7 +434,7 @@ + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") + # Raspberry Pi 2 +- add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations) ++ add_compile_options(-mno-unaligned-access -funsafe-math-optimizations) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") + # Raspberry Pi 3, 4, Zero 2 (32-bit) diff --git a/packages/llama-cpp/0002-impl-missing-functions.patch b/packages/llama-cpp/0002-impl-missing-functions.patch new file mode 100644 index 000000000000000..46522f8be057de1 --- /dev/null +++ b/packages/llama-cpp/0002-impl-missing-functions.patch @@ -0,0 +1,92 @@ +--- a/k_quants.c ++++ b/k_quants.c +@@ -43,6 +43,89 @@ + // 2-6 bit quantization in super-blocks + // + ++#if defined(__ARM_NEON) ++ ++#if !defined(__aarch64__) ++ ++inline static uint16_t vaddvq_u8(uint8x16_t v) { ++ return ++ (uint16_t)vgetq_lane_u8(v, 0) + (uint16_t)vgetq_lane_u8(v, 1) + ++ (uint16_t)vgetq_lane_u8(v, 2) + (uint16_t)vgetq_lane_u8(v, 3) + ++ (uint16_t)vgetq_lane_u8(v, 4) + (uint16_t)vgetq_lane_u8(v, 5) + ++ (uint16_t)vgetq_lane_u8(v, 6) + (uint16_t)vgetq_lane_u8(v, 7) + ++ (uint16_t)vgetq_lane_u8(v, 8) + (uint16_t)vgetq_lane_u8(v, 9) + ++ (uint16_t)vgetq_lane_u8(v, 10) + (uint16_t)vgetq_lane_u8(v, 11) + ++ (uint16_t)vgetq_lane_u8(v, 12) + (uint16_t)vgetq_lane_u8(v, 13) + ++ (uint16_t)vgetq_lane_u8(v, 14) + (uint16_t)vgetq_lane_u8(v, 15); ++} ++ ++inline static int16_t vaddvq_s8(int8x16_t v) { ++ return ++ (int16_t)vgetq_lane_s8(v, 0) + (int16_t)vgetq_lane_s8(v, 1) + ++ (int16_t)vgetq_lane_s8(v, 2) + (int16_t)vgetq_lane_s8(v, 3) + ++ (int16_t)vgetq_lane_s8(v, 4) + (int16_t)vgetq_lane_s8(v, 5) + ++ (int16_t)vgetq_lane_s8(v, 6) + (int16_t)vgetq_lane_s8(v, 7) + ++ (int16_t)vgetq_lane_s8(v, 8) + (int16_t)vgetq_lane_s8(v, 9) + ++ (int16_t)vgetq_lane_s8(v, 10) + (int16_t)vgetq_lane_s8(v, 11) + ++ (int16_t)vgetq_lane_s8(v, 12) + (int16_t)vgetq_lane_s8(v, 13) + ++ (int16_t)vgetq_lane_s8(v, 14) + (int16_t)vgetq_lane_s8(v, 15); ++} ++ ++inline static int32_t vaddvq_s16(int16x8_t v) { ++ return ++ (int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) + ++ (int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) + ++ (int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) + ++ (int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7); ++} ++ ++inline static uint32_t vaddvq_u16(uint16x8_t v) { ++ return ++ (uint32_t)vgetq_lane_u16(v, 0) + (uint32_t)vgetq_lane_u16(v, 1) + ++ (uint32_t)vgetq_lane_u16(v, 2) + (uint32_t)vgetq_lane_u16(v, 3) + ++ (uint32_t)vgetq_lane_u16(v, 4) + (uint32_t)vgetq_lane_u16(v, 5) + ++ (uint32_t)vgetq_lane_u16(v, 6) + (uint32_t)vgetq_lane_u16(v, 7); ++} ++ ++inline static int32_t vaddvq_s32(int32x4_t v) { ++ return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3); ++} ++ ++inline static float vaddvq_f32(float32x4_t v) { ++ return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3); ++} ++ ++inline static float vminvq_f32(float32x4_t v) { ++ return ++ MIN(MIN(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)), ++ MIN(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3))); ++} ++ ++inline static float vmaxvq_f32(float32x4_t v) { ++ return ++ MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)), ++ MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3))); ++} ++ ++inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) { ++ int32x4_t res; ++ ++ res[0] = roundf(vgetq_lane_f32(v, 0)); ++ res[1] = roundf(vgetq_lane_f32(v, 1)); ++ res[2] = roundf(vgetq_lane_f32(v, 2)); ++ res[3] = roundf(vgetq_lane_f32(v, 3)); ++ ++ return res; ++} ++ ++inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) { ++ const int16x4_t c = vpadd_s16(vget_low_s16(a), vget_high_s16(a)); ++ const int16x4_t d = vpadd_s16(vget_low_s16(b), vget_high_s16(b)); ++ return vcombine_s16(c, d); ++} ++ ++#endif ++#endif + + // + // ===================== Helper functions diff --git a/packages/llama-cpp/build.sh b/packages/llama-cpp/build.sh new file mode 100644 index 000000000000000..13f3ade857d5d91 --- /dev/null +++ b/packages/llama-cpp/build.sh @@ -0,0 +1,35 @@ +TERMUX_PKG_HOMEPAGE=https://github.com/ggerganov/llama.cpp +TERMUX_PKG_DESCRIPTION="Port of Facebook's LLaMA model in C/C++" +TERMUX_PKG_LICENSE=GPL-3.0 +TERMUX_PKG_MAINTAINER=@termux +_COMMIT="fff0e0eafe817eef429ecb64f892ab7bdae31846" +_COMMIT_POSISION=854 +TERMUX_PKG_VERSION=0.0.0-r$_COMMIT_POSISION-${_COMMIT:0:7} +TERMUX_PKG_SRCURL=git+https://github.com/ggerganov/llama.cpp +TERMUX_PKG_SHA256=95effaa75fdf1e7fb4819500f3aa6a9c970dbe36392a51a4ead904660841cd93 +TERMUX_PKG_GIT_BRANCH="master-${_COMMIT:0:7}" +TERMUX_PKG_AUTO_UPDATE=true +TERMUX_PKG_DEPENDS="openmpi, libopenblas" +TERMUX_PKG_RECOMMENDS="python-numpy, python-sentencepiece" +TERMUX_PKG_EXTRA_CONFIGURE_ARGS=" +-DLLAMA_MPI=ON +-DBUILD_SHARED_LIBS=ON +-DLLAMA_BLAS=ON +-DLLAMA_BLAS_VENDOR=OpenBLAS +" + +termux_step_post_get_source() { + git fetch --unshallow + git checkout $_COMMIT + + local _real_commit_posision="$(git rev-list HEAD --count)" + if [ "$_real_commit_posision" != "$_COMMIT_POSISION" ]; then + termux_error_exit "Please update commit posision. Expected: $_COMMIT_POSISION, current: $_real_commit_posision." + fi +} + +termux_step_post_make_install() { + cd "$TERMUX_PREFIX/bin" || exit 1 + mv main llama + mv server llama-server +}