diff --git a/ggml.c b/ggml.c index 3b38eaa..431cdb9 100644 --- a/ggml.c +++ b/ggml.c @@ -19,6 +19,7 @@ #include #include #include +#include // if C99 - static_assert is noop // ref: https://stackoverflow.com/a/53923785/4039976 @@ -1135,12 +1136,94 @@ static void quantize_row_q4_2_reference(const float * restrict x, block_q4_2 * r } } +static inline int nearest_int(float fval) { + assert(fval <= 4194303.f); + float val = fval + 12582912.f; + int i; memcpy(&i, &val, sizeof(int)); + return (i & 0x007fffff) - 0x00400000; +} + +static float kquantize_q4_with_bounds(int n, int nmin, int nmax, const float * restrict X, int nCandidates, + const float * restrict candidates, int8_t * restrict L) { + assert (nmin >= INT8_MIN); + assert (nmax <= INT8_MAX); + float amax = 0; + for (int i=0; i sumlxM2*suml2P) { + if (sumlxP2 > best*suml2P) { + best = sumlxP2/suml2P; bestScale = iscale; + } + } else { + if (sumlxM2 > best*suml2M) { + best = sumlxM2/suml2M; bestScale = -iscale; + } + } + } + float sumlx = 0; int suml2 = 0; + for (int i=0; i