diff --git a/llama.cpp b/llama.cpp index aa0c362..e4998ef 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1444,7 +1444,7 @@ static bool llama_model_quantize_internal(const std::string & fname_inp, const s return false; } - std::string word; + std::vector word(32); vocab.id_to_token.resize(n_vocab); for (int i = 0; i < n_vocab; i++) { uint32_t len; @@ -1459,10 +1459,10 @@ static bool llama_model_quantize_internal(const std::string & fname_inp, const s finp.read ((char *) &score, sizeof(score)); fout.write((char *) &score, sizeof(score)); - vocab.token_to_id[word] = i; + vocab.token_to_id[word.data()] = i; auto &tok_score = vocab.id_to_token[i]; - tok_score.tok = word; + tok_score.tok = word.data(); tok_score.score = score; } }