|
|
|
@ -395,6 +395,8 @@ struct llama_buffer {
|
|
|
|
|
uint8_t * addr = NULL;
|
|
|
|
|
size_t size = 0;
|
|
|
|
|
|
|
|
|
|
llama_buffer() = default;
|
|
|
|
|
|
|
|
|
|
void resize(size_t size) {
|
|
|
|
|
delete[] addr;
|
|
|
|
|
addr = new uint8_t[size];
|
|
|
|
@ -404,27 +406,59 @@ struct llama_buffer {
|
|
|
|
|
~llama_buffer() {
|
|
|
|
|
delete[] addr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// disable copy and move
|
|
|
|
|
llama_buffer(const llama_buffer&) = delete;
|
|
|
|
|
llama_buffer(llama_buffer&&) = delete;
|
|
|
|
|
llama_buffer& operator=(const llama_buffer&) = delete;
|
|
|
|
|
llama_buffer& operator=(llama_buffer&&) = delete;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#ifdef GGML_USE_CUBLAS
|
|
|
|
|
#include "ggml-cuda.h"
|
|
|
|
|
struct llama_ctx_buffer {
|
|
|
|
|
uint8_t * addr = NULL;
|
|
|
|
|
bool is_cuda;
|
|
|
|
|
size_t size = 0;
|
|
|
|
|
|
|
|
|
|
llama_ctx_buffer() = default;
|
|
|
|
|
|
|
|
|
|
void resize(size_t size) {
|
|
|
|
|
free();
|
|
|
|
|
|
|
|
|
|
addr = (uint8_t *) ggml_cuda_host_malloc(size);
|
|
|
|
|
if (addr) {
|
|
|
|
|
ggml_cuda_host_free(addr);
|
|
|
|
|
is_cuda = true;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
// fall back to pageable memory
|
|
|
|
|
addr = new uint8_t[size];
|
|
|
|
|
is_cuda = false;
|
|
|
|
|
}
|
|
|
|
|
addr = (uint8_t *) ggml_cuda_host_malloc(size);
|
|
|
|
|
this->size = size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
~llama_ctx_buffer() {
|
|
|
|
|
void free() {
|
|
|
|
|
if (addr) {
|
|
|
|
|
ggml_cuda_host_free(addr);
|
|
|
|
|
if (is_cuda) {
|
|
|
|
|
ggml_cuda_host_free(addr);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
delete[] addr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
addr = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
~llama_ctx_buffer() {
|
|
|
|
|
free();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// disable copy and move
|
|
|
|
|
llama_ctx_buffer(const llama_ctx_buffer&) = delete;
|
|
|
|
|
llama_ctx_buffer(llama_ctx_buffer&&) = delete;
|
|
|
|
|
llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
|
|
|
|
|
llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
|
|
|
|
|
};
|
|
|
|
|
#else
|
|
|
|
|
typedef llama_buffer llama_ctx_buffer;
|
|
|
|
|