new package: llama-cpp

Fix #17453
termux · Jul 20, 2023 · efaa0f6 · efaa0f6
1 parent 0c4986a
commit efaa0f6
Show file tree

Hide file tree

Showing 3 changed files with 138 additions and 0 deletions.
diff --git a/packages/llama-cpp/0001-fix-compile-options.patch b/packages/llama-cpp/0001-fix-compile-options.patch
@@ -0,0 +1,11 @@
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -434,7 +434,7 @@
+         endif()
+         if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
+             # Raspberry Pi 2
+-            add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations)
++            add_compile_options(-mno-unaligned-access -funsafe-math-optimizations)
+         endif()
+         if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
+             # Raspberry Pi 3, 4, Zero 2 (32-bit)
diff --git a/packages/llama-cpp/0002-impl-missing-functions.patch b/packages/llama-cpp/0002-impl-missing-functions.patch
@@ -0,0 +1,92 @@
+--- a/k_quants.c
++++ b/k_quants.c
+@@ -43,6 +43,89 @@
+ // 2-6 bit quantization in super-blocks
+ //
+
++#if defined(__ARM_NEON)
++
++#if !defined(__aarch64__)
++
++inline static uint16_t vaddvq_u8(uint8x16_t v) {
++    return
++        (uint16_t)vgetq_lane_u8(v, 0)  + (uint16_t)vgetq_lane_u8(v, 1)  +
++        (uint16_t)vgetq_lane_u8(v, 2)  + (uint16_t)vgetq_lane_u8(v, 3)  +
++        (uint16_t)vgetq_lane_u8(v, 4)  + (uint16_t)vgetq_lane_u8(v, 5)  +
++        (uint16_t)vgetq_lane_u8(v, 6)  + (uint16_t)vgetq_lane_u8(v, 7)  +
++        (uint16_t)vgetq_lane_u8(v, 8)  + (uint16_t)vgetq_lane_u8(v, 9)  +
++        (uint16_t)vgetq_lane_u8(v, 10) + (uint16_t)vgetq_lane_u8(v, 11) +
++        (uint16_t)vgetq_lane_u8(v, 12) + (uint16_t)vgetq_lane_u8(v, 13) +
++        (uint16_t)vgetq_lane_u8(v, 14) + (uint16_t)vgetq_lane_u8(v, 15);
++}
++
++inline static int16_t vaddvq_s8(int8x16_t v) {
++    return
++        (int16_t)vgetq_lane_s8(v, 0)  + (int16_t)vgetq_lane_s8(v, 1)  +
++        (int16_t)vgetq_lane_s8(v, 2)  + (int16_t)vgetq_lane_s8(v, 3)  +
++        (int16_t)vgetq_lane_s8(v, 4)  + (int16_t)vgetq_lane_s8(v, 5)  +
++        (int16_t)vgetq_lane_s8(v, 6)  + (int16_t)vgetq_lane_s8(v, 7)  +
++        (int16_t)vgetq_lane_s8(v, 8)  + (int16_t)vgetq_lane_s8(v, 9)  +
++        (int16_t)vgetq_lane_s8(v, 10) + (int16_t)vgetq_lane_s8(v, 11) +
++        (int16_t)vgetq_lane_s8(v, 12) + (int16_t)vgetq_lane_s8(v, 13) +
++        (int16_t)vgetq_lane_s8(v, 14) + (int16_t)vgetq_lane_s8(v, 15);
++}
++
++inline static int32_t vaddvq_s16(int16x8_t v) {
++    return
++        (int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
++        (int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
++        (int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
++        (int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
++}
++
++inline static uint32_t vaddvq_u16(uint16x8_t v) {
++    return
++        (uint32_t)vgetq_lane_u16(v, 0) + (uint32_t)vgetq_lane_u16(v, 1) +
++        (uint32_t)vgetq_lane_u16(v, 2) + (uint32_t)vgetq_lane_u16(v, 3) +
++        (uint32_t)vgetq_lane_u16(v, 4) + (uint32_t)vgetq_lane_u16(v, 5) +
++        (uint32_t)vgetq_lane_u16(v, 6) + (uint32_t)vgetq_lane_u16(v, 7);
++}
++
++inline static int32_t vaddvq_s32(int32x4_t v) {
++    return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
++}
++
++inline static float vaddvq_f32(float32x4_t v) {
++    return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
++}
++
++inline static float vminvq_f32(float32x4_t v) {
++    return
++        MIN(MIN(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
++            MIN(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
++}
++
++inline static float vmaxvq_f32(float32x4_t v) {
++    return
++        MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
++            MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
++}
++
++inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
++    int32x4_t res;
++
++    res[0] = roundf(vgetq_lane_f32(v, 0));
++    res[1] = roundf(vgetq_lane_f32(v, 1));
++    res[2] = roundf(vgetq_lane_f32(v, 2));
++    res[3] = roundf(vgetq_lane_f32(v, 3));
++
++    return res;
++}
++
++inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
++    const int16x4_t c = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
++    const int16x4_t d = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
++    return vcombine_s16(c, d);
++}
++
++#endif
++#endif
+
+ //
+ // ===================== Helper functions
diff --git a/packages/llama-cpp/build.sh b/packages/llama-cpp/build.sh
@@ -0,0 +1,35 @@
+TERMUX_PKG_HOMEPAGE=https://github.com/ggerganov/llama.cpp
+TERMUX_PKG_DESCRIPTION="Port of Facebook's LLaMA model in C/C++"
+TERMUX_PKG_LICENSE=GPL-3.0
+TERMUX_PKG_MAINTAINER=@termux
+_COMMIT="fff0e0eafe817eef429ecb64f892ab7bdae31846"
+_COMMIT_POSISION=854
+TERMUX_PKG_VERSION=0.0.0-r$_COMMIT_POSISION-${_COMMIT:0:7}
+TERMUX_PKG_SRCURL=git+https://github.com/ggerganov/llama.cpp
+TERMUX_PKG_SHA256=95effaa75fdf1e7fb4819500f3aa6a9c970dbe36392a51a4ead904660841cd93
+TERMUX_PKG_GIT_BRANCH="master-${_COMMIT:0:7}"
+TERMUX_PKG_AUTO_UPDATE=true
+TERMUX_PKG_DEPENDS="openmpi, libopenblas"
+TERMUX_PKG_RECOMMENDS="python-numpy, python-sentencepiece"
+TERMUX_PKG_EXTRA_CONFIGURE_ARGS="
+-DLLAMA_MPI=ON
+-DBUILD_SHARED_LIBS=ON
+-DLLAMA_BLAS=ON
+-DLLAMA_BLAS_VENDOR=OpenBLAS
+"
+
+termux_step_post_get_source() {
+	git fetch --unshallow
+	git checkout $_COMMIT
+
+	local _real_commit_posision="$(git rev-list HEAD --count)"
+	if [ "$_real_commit_posision" != "$_COMMIT_POSISION" ]; then
+		termux_error_exit "Please update commit posision. Expected: $_COMMIT_POSISION, current: $_real_commit_posision."
+	fi
+}
+
+termux_step_post_make_install() {
+	cd "$TERMUX_PREFIX/bin" || exit 1
+	mv main llama
+	mv server llama-server
+}