Skip to content

Commit

Permalink
change deprecated code names
Browse files Browse the repository at this point in the history
  • Loading branch information
YellowRoseCx committed Apr 12, 2024
1 parent 1929b3b commit 5a4371d
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 73 deletions.
2 changes: 1 addition & 1 deletion otherarch/ggml_v3.c
Original file line number Diff line number Diff line change
Expand Up @@ -3161,7 +3161,7 @@ size_t ggml_v3_row_size(enum ggml_v3_type type, int64_t ne) {
return ggml_v3_type_size(type)*ne/ggml_v3_blck_size(type);
}

double ggml_v3_type_sizef(enum ggml_v3_type type) {
double ggml_v3_row_size(enum ggml_v3_type type) {
return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
}

Expand Down
2 changes: 1 addition & 1 deletion otherarch/ggml_v3.h
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ extern "C" {
GGML_V3_API size_t ggml_v3_row_size (enum ggml_v3_type type, int64_t ne); // size in bytes for all elements in a row

GGML_V3_DEPRECATED(
GGML_V3_API double ggml_v3_type_sizef(enum ggml_v3_type type), // ggml_v3_type_size()/ggml_v3_blck_size() as float
GGML_V3_API double ggml_v3_row_size(enum ggml_v3_type type), // ggml_v3_type_size()/ggml_v3_blck_size() as float
"use ggml_v3_row_size() instead");

GGML_V3_API const char * ggml_v3_type_name(enum ggml_v3_type type);
Expand Down
38 changes: 19 additions & 19 deletions otherarch/gpt2_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,33 +136,33 @@ ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, g
const int kv_heads = hparams.n_head; // 1 if MQA else hparams.n_head
const int kv_dim = kv_heads * head_dim;

ctx_size += n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // ln_f_g
ctx_size += n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // ln_f_b
ctx_size += n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32); // ln_f_g
ctx_size += n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32); // ln_f_b

ctx_size += n_vocab*n_embd*ggml_v3_type_sizef(wtype); // wte
ctx_size += n_ctx*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // wpe
ctx_size += n_vocab*n_embd*ggml_v3_type_sizef(wtype); // lm_head
ctx_size += n_vocab*n_embd*ggml_v3_row_size(wtype); // wte
ctx_size += n_ctx*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32); // wpe
ctx_size += n_vocab*n_embd*ggml_v3_row_size(wtype); // lm_head

ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_1_g
ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_1_b
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_1_g
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_1_b

ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_2_g
ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_2_b
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_2_g
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_2_b

ctx_size += n_layer*((n_embd + 2*kv_dim)*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_attn_w // TODO:
ctx_size += n_layer*( (n_embd + 2*kv_dim)*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_attn_attn_b
ctx_size += n_layer*((n_embd + 2*kv_dim)*n_embd*ggml_v3_row_size(wtype)); // c_attn_attn_w // TODO:
ctx_size += n_layer*( (n_embd + 2*kv_dim)*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_attn_attn_b

ctx_size += n_layer*(n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_attn_proj_b
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_row_size(wtype)); // c_attn_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_attn_proj_b

ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_mlp_fc_w
ctx_size += n_layer*( 4*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_mlp_fc_b
ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_row_size(wtype)); // c_mlp_fc_w
ctx_size += n_layer*( 4*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_mlp_fc_b

ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_mlp_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_mlp_proj_b
ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_row_size(wtype)); // c_mlp_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_mlp_proj_b

ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F16); // memory_k
ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F16); // memory_v
ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F16); // memory_k
ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F16); // memory_v

ctx_size += (6 + 12*n_layer)*1024; // object overhead

Expand Down
34 changes: 17 additions & 17 deletions otherarch/gptj_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,31 +123,31 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
const int n_ctx = hparams.n_ctx;
const int n_vocab = hparams.n_vocab;

ctx_size += n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // ln_f_g
ctx_size += n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // ln_f_b
ctx_size += n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32); // ln_f_g
ctx_size += n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32); // ln_f_b

ctx_size += n_embd*n_vocab*ggml_v3_type_sizef(wtype); // wte
ctx_size += n_embd*n_vocab*ggml_v3_row_size(wtype); // wte

ctx_size += n_embd*n_vocab*ggml_v3_type_sizef(wtype); // lmh_g
ctx_size += n_vocab*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // lmh_b
ctx_size += n_embd*n_vocab*ggml_v3_row_size(wtype); // lmh_g
ctx_size += n_vocab*ggml_v3_row_size(GGML_V3_TYPE_F32); // lmh_b

ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_1_g
ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_1_b
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_1_g
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_1_b

ctx_size += n_layer*(n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_q_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_k_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_v_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_row_size(wtype)); // c_attn_q_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_row_size(wtype)); // c_attn_k_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_row_size(wtype)); // c_attn_v_proj_w

ctx_size += n_layer*(n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_row_size(wtype)); // c_attn_proj_w

ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_mlp_fc_w
ctx_size += n_layer*( 4*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_mlp_fc_b
ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_row_size(wtype)); // c_mlp_fc_w
ctx_size += n_layer*( 4*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_mlp_fc_b

ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_mlp_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_mlp_proj_b
ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_row_size(wtype)); // c_mlp_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_mlp_proj_b

ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_type_sizef(memory_type); // memory_k
ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_type_sizef(memory_type); // memory_v
ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_row_size(memory_type); // memory_k
ctx_size += std::max(origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_row_size(memory_type); // memory_v

ctx_size += (5 + 10*n_layer)*512; // object overhead

Expand Down
2 changes: 1 addition & 1 deletion otherarch/llama-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ struct llama_v3_mmap {

llama_v3_mmap(struct llama_v3_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
size = file->size;
int fd = fileno(file->fp);
int fd = _fileno(file->fp);
int flags = MAP_SHARED;
// prefetch/readahead impairs performance on NUMA systems
if (numa) { prefetch = 0; }
Expand Down
2 changes: 1 addition & 1 deletion otherarch/llama_v2-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ struct llama_v2_mmap {

llama_v2_mmap(struct llama_v2_file * file, bool prefetch = true) {
size = file->size;
int fd = fileno(file->fp);
int fd = _fileno(file->fp);
int flags = MAP_SHARED;
#ifdef __linux__
flags |= MAP_POPULATE;
Expand Down
24 changes: 12 additions & 12 deletions otherarch/mpt_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,18 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
const size_t n_layer = hparams.n_layers;
const size_t n_vocab = hparams.n_vocab;

ctx_size += n_embd * n_vocab * ggml_v3_type_sizef(wtype); // wte_weight
ctx_size += n_embd * ggml_v3_type_sizef(GGML_V3_TYPE_F32); // norm_f_weight

ctx_size += n_layer * (n_embd * ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_1_weight
ctx_size += n_layer * (3 * n_embd * n_embd * ggml_v3_type_sizef(wtype)); // attn_Wqkv_weight
ctx_size += n_layer * (n_embd * n_embd * ggml_v3_type_sizef(wtype)); // attn_out_proj_weight
ctx_size += n_layer * (n_embd * ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_2_weight
ctx_size += n_layer * (4 * n_embd * n_embd * ggml_v3_type_sizef(wtype)); // mlp_mlp_up_weight
ctx_size += n_layer * (n_embd * n_embd * 4 * ggml_v3_type_sizef(wtype)); // mlp_mlp_down_weight

ctx_size += n_ctx * n_layer * n_embd * ggml_v3_type_sizef(GGML_V3_TYPE_F16); // memory_k
ctx_size += n_ctx * n_layer * n_embd * ggml_v3_type_sizef(GGML_V3_TYPE_F16); // memory_v
ctx_size += n_embd * n_vocab * ggml_v3_row_size(wtype); // wte_weight
ctx_size += n_embd * ggml_v3_row_size(GGML_V3_TYPE_F32); // norm_f_weight

ctx_size += n_layer * (n_embd * ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_1_weight
ctx_size += n_layer * (3 * n_embd * n_embd * ggml_v3_row_size(wtype)); // attn_Wqkv_weight
ctx_size += n_layer * (n_embd * n_embd * ggml_v3_row_size(wtype)); // attn_out_proj_weight
ctx_size += n_layer * (n_embd * ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_2_weight
ctx_size += n_layer * (4 * n_embd * n_embd * ggml_v3_row_size(wtype)); // mlp_mlp_up_weight
ctx_size += n_layer * (n_embd * n_embd * 4 * ggml_v3_row_size(wtype)); // mlp_mlp_down_weight

ctx_size += n_ctx * n_layer * n_embd * ggml_v3_row_size(GGML_V3_TYPE_F16); // memory_k
ctx_size += n_ctx * n_layer * n_embd * ggml_v3_row_size(GGML_V3_TYPE_F16); // memory_v

ctx_size += (6 + 6 * n_layer) * 512; // object overhead

Expand Down
38 changes: 19 additions & 19 deletions otherarch/neox_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,34 +115,34 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
const size_t n_ctx = hparams.n_ctx;
const size_t n_vocab = hparams.n_vocab;

ctx_size += n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // ln_f_g
ctx_size += n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // ln_f_b
ctx_size += n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32); // ln_f_g
ctx_size += n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32); // ln_f_b

ctx_size += n_embd*n_vocab*ggml_v3_type_sizef(wtype); // wte
ctx_size += n_embd*n_vocab*ggml_v3_row_size(wtype); // wte

ctx_size += n_embd*n_vocab*ggml_v3_type_sizef(wtype); // lmh_g
//ctx_size += n_vocab*ggml_v3_type_sizef(GGML_V3_TYPE_F32); // lmh_b
ctx_size += n_embd*n_vocab*ggml_v3_row_size(wtype); // lmh_g
//ctx_size += n_vocab*ggml_v3_row_size(GGML_V3_TYPE_F32); // lmh_b

ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_1_g
ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_1_b
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_1_g
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_1_b

ctx_size += n_layer*(3*n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_attn_w
ctx_size += n_layer*( 3*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_attn_attn_b
ctx_size += n_layer*(3*n_embd*n_embd*ggml_v3_row_size(wtype)); // c_attn_attn_w
ctx_size += n_layer*( 3*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_attn_attn_b

ctx_size += n_layer*(n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_attn_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_attn_proj_b
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_row_size(wtype)); // c_attn_proj_w
ctx_size += n_layer*(n_embd*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_attn_proj_b

ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_2_g
ctx_size += n_layer*(n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // ln_2_b
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_2_g
ctx_size += n_layer*(n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // ln_2_b

ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_mlp_fc_w
ctx_size += n_layer*( 4*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_mlp_fc_b
ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_row_size(wtype)); // c_mlp_fc_w
ctx_size += n_layer*( 4*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_mlp_fc_b

ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_type_sizef(wtype)); // c_mlp_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F32)); // c_mlp_proj_b
ctx_size += n_layer*(4*n_embd*n_embd*ggml_v3_row_size(wtype)); // c_mlp_proj_w
ctx_size += n_layer*( n_embd*ggml_v3_row_size(GGML_V3_TYPE_F32)); // c_mlp_proj_b

ctx_size += std::max((size_t)origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F16); // memory_k
ctx_size += std::max((size_t)origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_type_sizef(GGML_V3_TYPE_F16); // memory_v
ctx_size += std::max((size_t)origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F16); // memory_k
ctx_size += std::max((size_t)origmaxctx,n_ctx)*n_layer*n_embd*ggml_v3_row_size(GGML_V3_TYPE_F16); // memory_v

ctx_size += (6 + 16*n_layer)*1024; // object overhead

Expand Down
4 changes: 2 additions & 2 deletions otherarch/rwkv_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1378,7 +1378,7 @@ bool rwkv_instance_from_file(const char * file_path, struct rwkv_instance & inst

RWKV_ASSERT_NULL_MSG(RWKV_ERROR_FILE | RWKV_ERROR_FILE_OPEN, file.file, "Failed to open file %s", file_path);
// Be very careful when changing this code. It must support files larger than 2 GB by using 64-bit functions to get the file length.
RWKV_ASSERT_NULL_MSG(RWKV_ERROR_FILE | RWKV_ERROR_FILE_STAT, fstat(fileno(file.file), &file_stat) == 0, "Failed to stat file %s", file_path);
RWKV_ASSERT_NULL_MSG(RWKV_ERROR_FILE | RWKV_ERROR_FILE_STAT, fstat(_fileno(file.file), &file_stat) == 0, "Failed to stat file %s", file_path);
RWKV_ASSERT_NULL_MSG(RWKV_ERROR_FILE, rwkv_fread_file_header(file.file, model.header), "Invalid file header");

struct rwkv_tensor_header tensor_header;
Expand Down Expand Up @@ -1799,7 +1799,7 @@ bool rwkv_quantize_model_file(const char * in_path, const char * out_path, const
RWKV_ASSERT_FALSE_MSG(RWKV_ERROR_FILE | RWKV_ERROR_FILE_OPEN, in_file.file, "Failed to open %s for reading", in_path);

// Be very careful when changing this code. It must support files larger than 2 GB by using 64-bit functions to the get file length.
RWKV_ASSERT_FALSE_MSG(RWKV_ERROR_FILE | RWKV_ERROR_FILE_STAT, fstat(fileno(in_file.file), &in_stat) == 0, "failed to stat file %s", in_path);
RWKV_ASSERT_FALSE_MSG(RWKV_ERROR_FILE | RWKV_ERROR_FILE_STAT, fstat(_fileno(in_file.file), &in_stat) == 0, "failed to stat file %s", in_path);

struct rwkv_file out_file(fopen(out_path, "wb"));
RWKV_ASSERT_FALSE_MSG(RWKV_ERROR_FILE | RWKV_ERROR_FILE_OPEN, out_file.file, "Failed to open %s for writing", out_path);
Expand Down

0 comments on commit 5a4371d

Please sign in to comment.