Skip to content

Commit

Permalink
new package: llama-cpp
Browse files Browse the repository at this point in the history
Fix #17453
  • Loading branch information
Freed-Wu authored and licy183 committed Jul 20, 2023
1 parent 0c4986a commit efaa0f6
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 0 deletions.
11 changes: 11 additions & 0 deletions packages/llama-cpp/0001-fix-compile-options.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -434,7 +434,7 @@
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
# Raspberry Pi 2
- add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations)
+ add_compile_options(-mno-unaligned-access -funsafe-math-optimizations)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
# Raspberry Pi 3, 4, Zero 2 (32-bit)
92 changes: 92 additions & 0 deletions packages/llama-cpp/0002-impl-missing-functions.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
--- a/k_quants.c
+++ b/k_quants.c
@@ -43,6 +43,89 @@
// 2-6 bit quantization in super-blocks
//

+#if defined(__ARM_NEON)
+
+#if !defined(__aarch64__)
+
+inline static uint16_t vaddvq_u8(uint8x16_t v) {
+ return
+ (uint16_t)vgetq_lane_u8(v, 0) + (uint16_t)vgetq_lane_u8(v, 1) +
+ (uint16_t)vgetq_lane_u8(v, 2) + (uint16_t)vgetq_lane_u8(v, 3) +
+ (uint16_t)vgetq_lane_u8(v, 4) + (uint16_t)vgetq_lane_u8(v, 5) +
+ (uint16_t)vgetq_lane_u8(v, 6) + (uint16_t)vgetq_lane_u8(v, 7) +
+ (uint16_t)vgetq_lane_u8(v, 8) + (uint16_t)vgetq_lane_u8(v, 9) +
+ (uint16_t)vgetq_lane_u8(v, 10) + (uint16_t)vgetq_lane_u8(v, 11) +
+ (uint16_t)vgetq_lane_u8(v, 12) + (uint16_t)vgetq_lane_u8(v, 13) +
+ (uint16_t)vgetq_lane_u8(v, 14) + (uint16_t)vgetq_lane_u8(v, 15);
+}
+
+inline static int16_t vaddvq_s8(int8x16_t v) {
+ return
+ (int16_t)vgetq_lane_s8(v, 0) + (int16_t)vgetq_lane_s8(v, 1) +
+ (int16_t)vgetq_lane_s8(v, 2) + (int16_t)vgetq_lane_s8(v, 3) +
+ (int16_t)vgetq_lane_s8(v, 4) + (int16_t)vgetq_lane_s8(v, 5) +
+ (int16_t)vgetq_lane_s8(v, 6) + (int16_t)vgetq_lane_s8(v, 7) +
+ (int16_t)vgetq_lane_s8(v, 8) + (int16_t)vgetq_lane_s8(v, 9) +
+ (int16_t)vgetq_lane_s8(v, 10) + (int16_t)vgetq_lane_s8(v, 11) +
+ (int16_t)vgetq_lane_s8(v, 12) + (int16_t)vgetq_lane_s8(v, 13) +
+ (int16_t)vgetq_lane_s8(v, 14) + (int16_t)vgetq_lane_s8(v, 15);
+}
+
+inline static int32_t vaddvq_s16(int16x8_t v) {
+ return
+ (int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
+ (int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
+ (int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
+ (int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
+}
+
+inline static uint32_t vaddvq_u16(uint16x8_t v) {
+ return
+ (uint32_t)vgetq_lane_u16(v, 0) + (uint32_t)vgetq_lane_u16(v, 1) +
+ (uint32_t)vgetq_lane_u16(v, 2) + (uint32_t)vgetq_lane_u16(v, 3) +
+ (uint32_t)vgetq_lane_u16(v, 4) + (uint32_t)vgetq_lane_u16(v, 5) +
+ (uint32_t)vgetq_lane_u16(v, 6) + (uint32_t)vgetq_lane_u16(v, 7);
+}
+
+inline static int32_t vaddvq_s32(int32x4_t v) {
+ return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
+}
+
+inline static float vaddvq_f32(float32x4_t v) {
+ return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
+}
+
+inline static float vminvq_f32(float32x4_t v) {
+ return
+ MIN(MIN(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
+ MIN(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
+}
+
+inline static float vmaxvq_f32(float32x4_t v) {
+ return
+ MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
+ MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
+}
+
+inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
+ int32x4_t res;
+
+ res[0] = roundf(vgetq_lane_f32(v, 0));
+ res[1] = roundf(vgetq_lane_f32(v, 1));
+ res[2] = roundf(vgetq_lane_f32(v, 2));
+ res[3] = roundf(vgetq_lane_f32(v, 3));
+
+ return res;
+}
+
+inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
+ const int16x4_t c = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
+ const int16x4_t d = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
+ return vcombine_s16(c, d);
+}
+
+#endif
+#endif

//
// ===================== Helper functions
35 changes: 35 additions & 0 deletions packages/llama-cpp/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
TERMUX_PKG_HOMEPAGE=https://github.com/ggerganov/llama.cpp
TERMUX_PKG_DESCRIPTION="Port of Facebook's LLaMA model in C/C++"
TERMUX_PKG_LICENSE=GPL-3.0
TERMUX_PKG_MAINTAINER=@termux
_COMMIT="fff0e0eafe817eef429ecb64f892ab7bdae31846"
_COMMIT_POSISION=854
TERMUX_PKG_VERSION=0.0.0-r$_COMMIT_POSISION-${_COMMIT:0:7}
TERMUX_PKG_SRCURL=git+https://github.com/ggerganov/llama.cpp
TERMUX_PKG_SHA256=95effaa75fdf1e7fb4819500f3aa6a9c970dbe36392a51a4ead904660841cd93
TERMUX_PKG_GIT_BRANCH="master-${_COMMIT:0:7}"
TERMUX_PKG_AUTO_UPDATE=true
TERMUX_PKG_DEPENDS="openmpi, libopenblas"
TERMUX_PKG_RECOMMENDS="python-numpy, python-sentencepiece"
TERMUX_PKG_EXTRA_CONFIGURE_ARGS="
-DLLAMA_MPI=ON
-DBUILD_SHARED_LIBS=ON
-DLLAMA_BLAS=ON
-DLLAMA_BLAS_VENDOR=OpenBLAS
"

termux_step_post_get_source() {
git fetch --unshallow
git checkout $_COMMIT

local _real_commit_posision="$(git rev-list HEAD --count)"
if [ "$_real_commit_posision" != "$_COMMIT_POSISION" ]; then
termux_error_exit "Please update commit posision. Expected: $_COMMIT_POSISION, current: $_real_commit_posision."
fi
}

termux_step_post_make_install() {
cd "$TERMUX_PREFIX/bin" || exit 1
mv main llama
mv server llama-server
}

0 comments on commit efaa0f6

Please sign in to comment.