diff --git a/llama.cpp b/llama.cpp
index aa0c362..e4998ef 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1444,7 +1444,7 @@ static bool llama_model_quantize_internal(const std::string & fname_inp, const s
             return false;
         }
 
-        std::string word;
+        std::vector<char> word(32);
         vocab.id_to_token.resize(n_vocab);
         for (int i = 0; i < n_vocab; i++) {
             uint32_t len;
@@ -1459,10 +1459,10 @@ static bool llama_model_quantize_internal(const std::string & fname_inp, const s
             finp.read ((char *) &score, sizeof(score));
             fout.write((char *) &score, sizeof(score));
 
-            vocab.token_to_id[word] = i;
+            vocab.token_to_id[word.data()] = i;
 
             auto &tok_score = vocab.id_to_token[i];
-            tok_score.tok = word;
+            tok_score.tok = word.data();
             tok_score.score = score;
         }
     }