You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
llama.cpp/ggml-opencl-dequant.cl

64 lines
1.4 KiB
Common Lisp

#define MULTILINE_QUOTE(...) #__VA_ARGS__
const char * clblast_dequant = MULTILINE_QUOTE(
struct block_q4_0
{
float d;
uchar qs[16];
};
__kernel void dequantize_row_q4_0(__global struct block_q4_0* blocks, __global float* result) {
const uint i = get_global_id(0) / 32;
const uint l = get_local_id(0);
const float d = blocks[i].d;
const uchar vi = blocks[i].qs[l];
const uint index = i*32 + l*2;
result[index + 0] = ((vi & 0xf) - 8)*d;
result[index + 1] = ((vi >> 4) - 8)*d;
}
struct block_q4_1
{
float d;
float m;
uchar qs[16];
};
__kernel void dequantize_row_q4_1(__global struct block_q4_1* blocks, __global float* result) {
const uint i = get_global_id(0) / 32;
const uint l = get_local_id(0);
const float d = blocks[i].d;
const float m = blocks[i].m;
const uchar vi = blocks[i].qs[l];
const uint index = i*32 + l*2;
result[index + 0] = (vi & 0xf) * d + m;
result[index + 1] = (vi >> 4) * d + m;
}
struct block_q4_2
{
ushort d;
uchar qs[8];
};
__kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global float* result) {
const uint i = get_global_id(0) / 16;
const uint l = get_local_id(0);
const float d = vload_half(0, (__global half*) &blocks[i].d);;
const uchar vi = blocks[i].qs[l];
const uint index = i*16 + l*2;
result[index + 0] = ((vi & 0xf) - 8)*d;
result[index + 1] = ((vi >> 4) - 8)*d;
}
);