Skip to content

Commit

Permalink
Update to 1.52.2 with 'upstream/concedo'
Browse files Browse the repository at this point in the history
  • Loading branch information
YellowRoseCx committed Dec 18, 2023
2 parents eee005e + ec05230 commit 509ad00
Show file tree
Hide file tree
Showing 21 changed files with 611 additions and 275 deletions.
18 changes: 10 additions & 8 deletions common/train.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void free_random_uniform_distribution(struct random_uniform_distribution * rnd)

struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) {
float scale = 1.0f; // xavier
switch (tensor->n_dims) {
switch (ggml_n_dims(tensor)) {
case 1:
scale /= sqrtf((float) tensor->ne[0]);
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
Expand Down Expand Up @@ -119,7 +119,7 @@ struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct
}

struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd) {
switch (tensor->n_dims) {
switch (ggml_n_dims(tensor)) {
case 1:
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0]);
Expand Down Expand Up @@ -183,25 +183,27 @@ float fclamp(const float v, const float min, const float max) {
}

void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
GGML_ASSERT(tensor->n_dims == 1);
GGML_ASSERT(tensor->ne[0] == ne0);
GGML_ASSERT(tensor->ne[1] == 1);
GGML_ASSERT(tensor->ne[2] == 1);
GGML_ASSERT(tensor->ne[3] == 1);
}

void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
GGML_ASSERT(tensor->n_dims == 2);
GGML_ASSERT(tensor->ne[0] == ne0);
GGML_ASSERT(tensor->ne[1] == ne1);
GGML_ASSERT(tensor->ne[2] == 1);
GGML_ASSERT(tensor->ne[3] == 1);
}

void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
GGML_ASSERT(tensor->n_dims == 3);
GGML_ASSERT(tensor->ne[0] == ne0);
GGML_ASSERT(tensor->ne[1] == ne1);
GGML_ASSERT(tensor->ne[2] == ne2);
GGML_ASSERT(tensor->ne[3] == 1);
}

void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
GGML_ASSERT(tensor->n_dims == 4);
GGML_ASSERT(tensor->ne[0] == ne0);
GGML_ASSERT(tensor->ne[1] == ne1);
GGML_ASSERT(tensor->ne[2] == ne2);
Expand All @@ -225,8 +227,8 @@ int64_t get_example_targets_batch(
bool sample_random_offsets
) {
GGML_ASSERT(samples_count > 0);
GGML_ASSERT(tokens_input->n_dims == 2);
GGML_ASSERT(target_probs->n_dims == 3);
GGML_ASSERT(ggml_is_matrix(tokens_input));
GGML_ASSERT(ggml_is_3d(target_probs));
int64_t n_vocab = target_probs->ne[0];
int64_t n_tokens = tokens_input->ne[0];
int64_t n_batch = tokens_input->ne[1];
Expand Down
84 changes: 44 additions & 40 deletions convert-lora-to-ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,20 @@

import json
import os
import re
import struct
import sys
from typing import Any, BinaryIO, Sequence

import numpy as np
import torch

NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}

from pathlib import Path
if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
import gguf

HF_SUBLAYER_TO_GGML = {
"self_attn.q_proj": "attn_q",
"self_attn.k_proj": "attn_k",
"self_attn.v_proj": "attn_v",
"self_attn.o_proj": "attn_output",
"mlp.gate_proj": "ffn_gate",
"mlp.down_proj": "ffn_down",
"mlp.up_proj": "ffn_up",
"input_layernorm": "attn_norm",
"post_attention_layernorm": "ffn_norm",
}


def translate_tensor_name(t: str) -> str:
match = re.match(r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t)
if match:
nn = match.group(1)
sub_layer = match.group(2)
lora_type = match.group(3)

sub_layer_renamed = HF_SUBLAYER_TO_GGML.get(sub_layer)
if sub_layer_renamed is None:
print(f"Error: unrecognized sub-layer {sub_layer} in tensor {t}")
sys.exit(1)

output_string = (
f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
)
return output_string
else:
print(f"Error: unrecognized tensor {t}")
sys.exit(1)
NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}


def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
Expand All @@ -61,9 +32,7 @@ def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
fout.write(struct.pack("i", int(params["lora_alpha"])))


def write_tensor_header(
self, name: str, shape: Sequence[int], data_type: np.dtype[Any]
) -> None:
def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None:
sname = name.encode("utf-8")
fout.write(
struct.pack(
Expand All @@ -78,18 +47,27 @@ def write_tensor_header(
fout.seek((fout.tell() + 31) & -32)


if len(sys.argv) != 2:
print(f"Usage: python {sys.argv[0]} <path>")
if len(sys.argv) < 2:
print(f"Usage: python {sys.argv[0]} <path> [arch]")
print(
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
)
print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
sys.exit(1)

input_json = os.path.join(sys.argv[1], "adapter_config.json")
input_model = os.path.join(sys.argv[1], "adapter_model.bin")
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")

model = torch.load(input_model, map_location="cpu")
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"

if arch_name not in gguf.MODEL_ARCH_NAMES.values():
print(f"Error: unsupported architecture {arch_name}")
sys.exit(1)

arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone

with open(input_json, "r") as f:
params = json.load(f)
Expand Down Expand Up @@ -117,6 +95,7 @@ def write_tensor_header(

write_file_header(fout, params)
for k, v in model.items():
orig_k = k
if k.endswith(".default.weight"):
k = k.replace(".default.weight", ".weight")
if k in ["llama_proj.weight", "llama_proj.bias"]:
Expand All @@ -129,7 +108,32 @@ def write_tensor_header(
v = v.float()

t = v.detach().numpy()
tname = translate_tensor_name(k)

prefix = "base_model.model."
if k.startswith(prefix):
k = k[len(prefix) :]

lora_suffixes = (".lora_A.weight", ".lora_B.weight")
if k.endswith(lora_suffixes):
suffix = k[-len(lora_suffixes[0]):]
k = k[: -len(lora_suffixes[0])]
else:
print(f"Error: unrecognized tensor name {orig_k}")
sys.exit(1)

tname = name_map.get_name(k)
if tname is None:
print(f"Error: could not map tensor name {orig_k}")
print(" Note: the arch parameter must be specified if the model is not llama")
sys.exit(1)

if suffix == ".lora_A.weight":
tname += ".weight.loraA"
elif suffix == ".lora_B.weight":
tname += ".weight.loraB"
else:
assert False

print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout)
Expand Down
18 changes: 9 additions & 9 deletions examples/baby-llama/baby-llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1258,9 +1258,9 @@ static struct ggml_tensor * forward_lora(
}

static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
assert(logits->n_dims == 2);
assert(probs->n_dims == 2);
assert(best_samples->n_dims == 1);
assert(ggml_is_matrix(logits));
assert(ggml_is_matrix(probs));
assert(ggml_is_vector(best_samples));
assert(logits->ne[1] == best_samples->ne[0]);
assert(logits->ne[0] == probs->ne[0]);
assert(logits->ne[1] == probs->ne[1]);
Expand Down Expand Up @@ -1292,9 +1292,9 @@ static void sample_softmax_batch(
struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs,
struct ggml_tensor * best_samples
) {
GGML_ASSERT(best_samples->n_dims == 2);
GGML_ASSERT(logits->n_dims == 3);
GGML_ASSERT(probs->n_dims == 3);
GGML_ASSERT(ggml_is_matrix(best_samples));
GGML_ASSERT(ggml_is_3d(logits));
GGML_ASSERT(ggml_is_3d(probs));
int n_tokens = best_samples->ne[0];
int n_batch = best_samples->ne[1];
int n_vocab = logits->ne[0];
Expand Down Expand Up @@ -1334,7 +1334,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
}

static void print_matrix(struct ggml_tensor * probs) {
assert(probs->n_dims == 2);
assert(ggml_is_matrix(probs));
for (int i = 0; i < probs->ne[1]; ++i) {
for (int k = 0; k < probs->ne[0]; ++k) {
float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k);
Expand Down Expand Up @@ -1386,8 +1386,8 @@ static void get_example_targets(int example_id, struct ggml_tensor * tokens_inpu
static void get_example_targets_batch(
struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets
) {
GGML_ASSERT(tokens_input->n_dims == 2);
GGML_ASSERT( targets->n_dims == 3);
GGML_ASSERT(ggml_is_matrix(tokens_input));
GGML_ASSERT(ggml_is_3d(targets));
int n_tokens = tokens_input->ne[0];
int n_batch = tokens_input->ne[1];
GGML_ASSERT(n_tokens == targets->ne[1]);
Expand Down
14 changes: 7 additions & 7 deletions examples/benchmark/benchmark-matmult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,13 @@ int main(int argc, char ** argv) {
const ggml_type qtype = GGML_TYPE_Q4_1;

size_t ctx_size = 0;
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32);
ctx_size += sizex*sizey*ggml_type_sizef(qtype);
ctx_size += sizex*sizey*ggml_type_sizef(qtype);
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizez);
ctx_size += ggml_row_size(qtype, sizex*sizey);
ctx_size += ggml_row_size(qtype, sizex*sizey);
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
ctx_size += 1024*1024*16;

printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
Expand Down
4 changes: 2 additions & 2 deletions examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
}

static void print_matrix(struct ggml_tensor * probs) {
assert(probs->n_dims == 2);
assert(ggml_is_matrix(probs));
for (int i = 0; i < probs->ne[1]; ++i) {
for (int k = 0; k < probs->ne[0]; ++k) {
float p = get_f32_2d(probs, k, i);
Expand Down Expand Up @@ -639,7 +639,7 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab

static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
int ct;
switch (gg_weights->n_dims){
switch (ggml_n_dims(gg_weights)) {
case 1:
ct = 0;
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++){
Expand Down
2 changes: 1 addition & 1 deletion examples/finetune/finetune.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,7 @@ static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor,
name = ggml_get_name(tensor);
}
uint32_t name_len = strlen(name);
uint32_t nd = tensor->n_dims;
uint32_t nd = ggml_n_dims(tensor);
uint32_t ne[4] = { (uint32_t)tensor->ne[0],
(uint32_t)tensor->ne[1],
(uint32_t)tensor->ne[2],
Expand Down
2 changes: 1 addition & 1 deletion examples/gguf/gguf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ static bool gguf_ex_read_1(const std::string & fname) {

struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);

printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data);
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);

// print first 10 elements
const float * data = (const float *) cur->data;
Expand Down
6 changes: 3 additions & 3 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
ctx_size += padded_size;
if (verbosity >= 3) {
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, padded_size=%zu, offset=%zu\n", __func__, i,
cur->n_dims, cur->name, tensor_size, padded_size, offset);
ggml_n_dims(cur), cur->name, tensor_size, padded_size, offset);
}
}
}
Expand Down Expand Up @@ -962,7 +962,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
}

// quantize only 2D tensors
quantize &= (cur->n_dims == 2);
quantize &= (ggml_n_dims(cur) == 2);

if (quantize) {
new_type = type;
Expand Down Expand Up @@ -1035,7 +1035,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
fout.put(0);
}

printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), cur->n_dims, quantize,
printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), ggml_n_dims(cur), quantize,
orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
}

Expand Down
3 changes: 2 additions & 1 deletion examples/server/public/completion.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ export async function* llama(prompt, params = {}, config = {}) {
headers: {
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Accept': 'text/event-stream'
'Accept': 'text/event-stream',
...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
},
signal: controller.signal,
});
Expand Down
7 changes: 6 additions & 1 deletion examples/server/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,8 @@
grammar: '',
n_probs: 0, // no completion_probabilities,
image_data: [],
cache_prompt: true
cache_prompt: true,
api_key: ''
})

/* START: Support for storing prompt templates and parameters in browsers LocalStorage */
Expand Down Expand Up @@ -790,6 +791,10 @@
<fieldset>
${IntField({ label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs })}
</fieldset>
<fieldset>
<label for="api_key">API Key</label>
<input type="text" name="api_key" value="${params.value.api_key}" placeholder="Enter API key" oninput=${updateParams} />
</fieldset>
</details>
</form>
`
Expand Down
Loading

0 comments on commit 509ad00

Please sign in to comment.