diff --git a/.gitignore b/.gitignore index ec7c505..7e9e7a2 100644 --- a/.gitignore +++ b/.gitignore @@ -24,410 +24,3 @@ arm_neon.h compile_commands.json CMakeFiles/ CMakeCache.txt - -# Visual Studio stuff -*.exe -*.sln -*.vcxproj -*.vcxproj.filters -cmake_install.cmake -*.dir/ - -## Ignore Visual Studio temporary files, build results, and -## files generated by popular Visual Studio add-ons. -## -## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore - -# User-specific files -*.rsuser -*.suo -*.user -*.userosscache -*.sln.docstates - -# User-specific files (MonoDevelop/Xamarin Studio) -*.userprefs - -# Mono auto generated files -mono_crash.* - -# Build results -[Dd]ebug/ -[Dd]ebugPublic/ -[Rr]elease/ -[Rr]eleases/ -x64/ -x86/ -[Ww][Ii][Nn]32/ -[Aa][Rr][Mm]/ -[Aa][Rr][Mm]64/ -bld/ -[Bb]in/ -[Oo]bj/ -[Ll]og/ -[Ll]ogs/ - -# Visual Studio 2015/2017 cache/options directory -.vs/ -# Uncomment if you have tasks that create the project's static files in wwwroot -#wwwroot/ - -# Visual Studio 2017 auto generated files -Generated\ Files/ - -# MSTest test Results -[Tt]est[Rr]esult*/ -[Bb]uild[Ll]og.* - -# NUnit -*.VisualState.xml -TestResult.xml -nunit-*.xml - -# Build Results of an ATL Project -[Dd]ebugPS/ -[Rr]eleasePS/ -dlldata.c - -# Benchmark Results -BenchmarkDotNet.Artifacts/ - -# .NET Core -project.lock.json -project.fragment.lock.json -artifacts/ - -# ASP.NET Scaffolding -ScaffoldingReadMe.txt - -# StyleCop -StyleCopReport.xml - -# Files built by Visual Studio -*_i.c -*_p.c -*_h.h -*.ilk -*.meta -*.obj -*.iobj -*.pch -*.pdb -*.ipdb -*.pgc -*.pgd -*.rsp -*.sbr -*.tlb -*.tli -*.tlh -*.tmp -*.tmp_proj -*_wpftmp.csproj -*.log -*.tlog -*.vspscc -*.vssscc -.builds -*.pidb -*.svclog -*.scc - -# Chutzpah Test files -_Chutzpah* - -# Visual C++ cache files -ipch/ -*.aps -*.ncb -*.opendb -*.opensdf -*.sdf -*.cachefile -*.VC.db -*.VC.VC.opendb - -# Visual Studio profiler -*.psess -*.vsp -*.vspx -*.sap - -# Visual Studio Trace Files -*.e2e - -# TFS 2012 Local Workspace -$tf/ - -# Guidance Automation Toolkit -*.gpState - -# ReSharper is a .NET coding add-in -_ReSharper*/ -*.[Rr]e[Ss]harper -*.DotSettings.user - -# TeamCity is a build add-in -_TeamCity* - -# DotCover is a Code Coverage Tool -*.dotCover - -# AxoCover is a Code Coverage Tool -.axoCover/* -!.axoCover/settings.json - -# Coverlet is a free, cross platform Code Coverage Tool -coverage*.json -coverage*.xml -coverage*.info - -# Visual Studio code coverage results -*.coverage -*.coveragexml - -# NCrunch -_NCrunch_* -.*crunch*.local.xml -nCrunchTemp_* - -# MightyMoose -*.mm.* -AutoTest.Net/ - -# Web workbench (sass) -.sass-cache/ - -# Installshield output folder -[Ee]xpress/ - -# DocProject is a documentation generator add-in -DocProject/buildhelp/ -DocProject/Help/*.HxT -DocProject/Help/*.HxC -DocProject/Help/*.hhc -DocProject/Help/*.hhk -DocProject/Help/*.hhp -DocProject/Help/Html2 -DocProject/Help/html - -# Click-Once directory -publish/ - -# Publish Web Output -*.[Pp]ublish.xml -*.azurePubxml -# Note: Comment the next line if you want to checkin your web deploy settings, -# but database connection strings (with potential passwords) will be unencrypted -*.pubxml -*.publishproj - -# Microsoft Azure Web App publish settings. Comment the next line if you want to -# checkin your Azure Web App publish settings, but sensitive information contained -# in these scripts will be unencrypted -PublishScripts/ - -# NuGet Packages -*.nupkg -# NuGet Symbol Packages -*.snupkg -# The packages folder can be ignored because of Package Restore -**/[Pp]ackages/* -# except build/, which is used as an MSBuild target. -!**/[Pp]ackages/build/ -# Uncomment if necessary however generally it will be regenerated when needed -#!**/[Pp]ackages/repositories.config -# NuGet v3's project.json files produces more ignorable files -*.nuget.props -*.nuget.targets - -# Microsoft Azure Build Output -csx/ -*.build.csdef - -# Microsoft Azure Emulator -ecf/ -rcf/ - -# Windows Store app package directories and files -AppPackages/ -BundleArtifacts/ -Package.StoreAssociation.xml -_pkginfo.txt -*.appx -*.appxbundle -*.appxupload - -# Visual Studio cache files -# files ending in .cache can be ignored -*.[Cc]ache -# but keep track of directories ending in .cache -!?*.[Cc]ache/ - -# Others -ClientBin/ -~$* -*~ -*.dbmdl -*.dbproj.schemaview -*.jfm -*.pfx -*.publishsettings -orleans.codegen.cs - -# Including strong name files can present a security risk -# (https://github.com/github/gitignore/pull/2483#issue-259490424) -#*.snk - -# Since there are multiple workflows, uncomment next line to ignore bower_components -# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) -#bower_components/ - -# RIA/Silverlight projects -Generated_Code/ - -# Backup & report files from converting an old project file -# to a newer Visual Studio version. Backup files are not needed, -# because we have git ;-) -_UpgradeReport_Files/ -Backup*/ -UpgradeLog*.XML -UpgradeLog*.htm -ServiceFabricBackup/ -*.rptproj.bak - -# SQL Server files -*.mdf -*.ldf -*.ndf - -# Business Intelligence projects -*.rdl.data -*.bim.layout -*.bim_*.settings -*.rptproj.rsuser -*- [Bb]ackup.rdl -*- [Bb]ackup ([0-9]).rdl -*- [Bb]ackup ([0-9][0-9]).rdl - -# Microsoft Fakes -FakesAssemblies/ - -# GhostDoc plugin setting file -*.GhostDoc.xml - -# Node.js Tools for Visual Studio -.ntvs_analysis.dat -node_modules/ - -# Visual Studio 6 build log -*.plg - -# Visual Studio 6 workspace options file -*.opt - -# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) -*.vbw - -# Visual Studio 6 auto-generated project file (contains which files were open etc.) -*.vbp - -# Visual Studio 6 workspace and project file (working project files containing files to include in project) -*.dsw -*.dsp - -# Visual Studio 6 technical files -*.ncb -*.aps - -# Visual Studio LightSwitch build output -**/*.HTMLClient/GeneratedArtifacts -**/*.DesktopClient/GeneratedArtifacts -**/*.DesktopClient/ModelManifest.xml -**/*.Server/GeneratedArtifacts -**/*.Server/ModelManifest.xml -_Pvt_Extensions - -# Paket dependency manager -.paket/paket.exe -paket-files/ - -# FAKE - F# Make -.fake/ - -# CodeRush personal settings -.cr/personal - -# Python Tools for Visual Studio (PTVS) -__pycache__/ -*.pyc - -# Cake - Uncomment if you are using it -# tools/** -# !tools/packages.config - -# Tabs Studio -*.tss - -# Telerik's JustMock configuration file -*.jmconfig - -# BizTalk build output -*.btp.cs -*.btm.cs -*.odx.cs -*.xsd.cs - -# OpenCover UI analysis results -OpenCover/ - -# Azure Stream Analytics local run output -ASALocalRun/ - -# MSBuild Binary and Structured Log -*.binlog - -# NVidia Nsight GPU debugger configuration file -*.nvuser - -# MFractors (Xamarin productivity tool) working folder -.mfractor/ - -# Local History for Visual Studio -.localhistory/ - -# Visual Studio History (VSHistory) files -.vshistory/ - -# BeatPulse healthcheck temp database -healthchecksdb - -# Backup folder for Package Reference Convert tool in Visual Studio 2017 -MigrationBackup/ - -# Ionide (cross platform F# VS Code tools) working folder -.ionide/ - -# Fody - auto-generated XML schema -FodyWeavers.xsd - -# VS Code files for those working on multiple tools -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -*.code-workspace - -# Local History for Visual Studio Code -.history/ - -# Windows Installer files from build outputs -*.cab -*.msi -*.msix -*.msm -*.msp - -# JetBrains Rider -*.sln.iml diff --git a/CMakeLists.txt b/CMakeLists.txt index ca3be38..24213b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,9 @@ endif() add_executable(llama main.cpp utils.cpp - utils.h) + utils.h + mmap.c + mmap.h) add_executable(quantize quantize.cpp diff --git a/Makefile b/Makefile index 56858b3..044e7d7 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ endif # Compile flags # -CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -g -fno-omit-frame-pointer -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC -g -fno-omit-frame-pointer +CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC +CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC LDFLAGS = # OS specific @@ -185,14 +185,17 @@ default: main quantize ggml.o: ggml.c ggml.h $(CC) $(CFLAGS) -c ggml.c -o ggml.o +mmap.o: mmap.c mmap.h + $(CC) $(CFLAGS) -c mmap.c -o mmap.o + utils.o: utils.cpp utils.h $(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o clean: rm -f *.o main quantize -main: main.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) +main: main.cpp ggml.o utils.o mmap.o + $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o mmap.o -o main $(LDFLAGS) ./main -h quantize: quantize.cpp ggml.o utils.o diff --git a/ggml.c b/ggml.c index 59a8855..0c7aee7 100644 --- a/ggml.c +++ b/ggml.c @@ -2437,7 +2437,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { *ctx = (struct ggml_context) { /*.mem_size =*/ params.mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : _malloc(params.mem_size), + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size), /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, /*.n_objects =*/ 0, /*.objects_begin =*/ NULL, @@ -2469,7 +2469,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size); if (ctx->mem_buffer_owned) { - _free(ctx->mem_buffer); + free(ctx->mem_buffer); } found = true; diff --git a/ggml.h b/ggml.h index 38b991f..7ce655c 100644 --- a/ggml.h +++ b/ggml.h @@ -183,9 +183,6 @@ extern "C" { #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_OPT 4 -void* _malloc(size_t n); -void _free(void* p); - #ifdef __ARM_NEON // we use the built-in 16-bit float type typedef __fp16 ggml_fp16_t; diff --git a/main.cpp b/main.cpp index 9e8087a..a82ace4 100644 --- a/main.cpp +++ b/main.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -23,10 +24,6 @@ #include #include #include -#else -#include -#define msync(addr, len_bytes, flag) winMSync -#define MS_ASYNC 0 #endif #define ROUNDUP(X, K) (((X) + (K)-1) & -(K)) @@ -34,7 +31,7 @@ #define MAGIC_PATH "magic.dat" #define MAGIC_ADDR (char *)0x330000000000 -#define MAGIC_GRAN 2097152 +#define MAGIC_GRAN 65536 #define MAGIC_ALGN (sizeof(size_t) * 2) #define ANSI_COLOR_RED "\x1b[31m" @@ -104,49 +101,21 @@ struct llama_model { std::map tensors; }; - struct magic { uint32_t magic; std::atomic lock; int fd; - size_t commit; - size_t offset; - size_t capacity; - gpt_vocab* vocab; - llama_model* model; + uint64_t commit; + uint64_t offset; + uint64_t capacity; + gpt_vocab *vocab; + llama_model *model; }; -static void winMSync(magic* addr, size_t len_bytes) { - bool success = FlushViewOfFile((void*)addr, len_bytes); - if (!success) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("failed with error %d: %s"), - error_code, lpMsgBuf); - } - HANDLE hFile = (HANDLE)_get_osfhandle(addr->fd); - FlushFileBuffers(hFile); -} - - static struct magic *mag; static inline void spin_lock(std::atomic &lock) { - while (!lock.exchange(1, std::memory_order_acquire)); + while (lock.exchange(1, std::memory_order_acquire)); } static inline void spin_unlock(std::atomic &lock) { @@ -162,62 +131,64 @@ static void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t } static void magic_commit(void) { - mag->offset = mag->capacity; - mag->commit = mag->capacity; + mag->commit = ROUNDUP(mag->offset, MAGIC_GRAN); mag->magic = 0xFEEDABEE; - bool success = msync(mag, mag->commit, MS_ASYNC); + if (msync(mag, mag->commit, MS_ASYNC) == -1) { + perror("msync"); + exit(77); + } } static void magic_init(void) { int fd; size_t n; -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) - struct stat st; -#else - struct _stat64 st; -#endif + int64_t size; if (mag) return; n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN); if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) { - int result = fstat(fd, &st); - int error = errno; - if (errno == EBADF) - fprintf(stderr, "Bad file descriptor.\n"); - else if (errno == EINVAL) - fprintf(stderr, "Invalid argument to _fstat.\n"); - if (st.st_size >= n) { + if ((size = lseek(fd, 0, SEEK_END)) == -1) { + perror("lseek"); + exit(77); + } + if (size >= n) { mag = (struct magic *)Mmap(MAGIC_ADDR, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, fd, 0); if (mag->magic == 0xFEEDABEE) { - mag = (struct magic *)Mmap(MAGIC_ADDR, mag->capacity, + mag = (struct magic *)Mmap(MAGIC_ADDR, mag->commit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, fd, 0); madvise(MAGIC_ADDR, mag->capacity, MADV_WILLNEED); - ftruncate(fd, mag->commit); mag->offset = mag->commit; mag->capacity = mag->commit; mag->fd = -1; return; } } - ftruncate(fd, 0); + if (ftruncate(fd, 0) == -1) { + perror("ftruncate"); + exit(77); + } } else if ((fd = open(MAGIC_PATH, O_RDWR | O_CREAT | O_TRUNC, 0644)) == -1) { perror(MAGIC_PATH); exit(77); } - ftruncate(fd, n); + if (ftruncate(fd, n) == -1) { + perror("ftruncate"); + exit(77); + } mag = (struct magic *)Mmap(MAGIC_ADDR, n, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); - mag->offset = MAGIC_GRAN; + mag->offset = n; + mag->capacity = n; mag->fd = fd; } -void *memalign(size_t a, size_t n) { +void *magic_memalign(size_t a, size_t n) { void *p; - size_t i, j, k, m; static int count; + size_t i, j, k, m, c2; magic_init(); if (a < MAGIC_ALGN) a = MAGIC_ALGN; while (!IS2POW(a)) ++a; @@ -227,24 +198,37 @@ void *memalign(size_t a, size_t n) { i = i + sizeof(size_t); i = ROUNDUP(i, a); j = ROUNDUP(i + m, MAGIC_GRAN); - //if (j > mag->capacity) { + if (j > mag->capacity) { + c2 = mag->capacity; + if (!c2) { + c2 = MAGIC_GRAN; + } + while (j > c2) { + c2 += c2 >> 4; + c2 = ROUNDUP(c2, MAGIC_GRAN); + } if (!mag->magic) { - int result = ftruncate(mag->fd, j); + if (ftruncate(mag->fd, c2) == -1) { + perror("ftruncate"); + spin_unlock(mag->lock); + return 0; + } p = mmap(MAGIC_ADDR + mag->capacity, - j - mag->capacity, PROT_READ | PROT_WRITE, + c2 - mag->capacity, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity); } else { p = mmap(MAGIC_ADDR + mag->capacity, - j - mag->capacity, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + c2 - mag->capacity, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); } if (p != MAP_FAILED) { - mag->capacity = j; + mag->capacity = c2; } else { + perror("mmap"); spin_unlock(mag->lock); return 0; } - //} + } mag->offset = i + m; spin_unlock(mag->lock); p = MAGIC_ADDR + i; @@ -252,60 +236,44 @@ void *memalign(size_t a, size_t n) { return p; } -void *_malloc(size_t n) { - return memalign(MAGIC_ALGN, n); -} - -size_t malloc_usable_size(const void *p) { - return ((const size_t *)p)[-1]; +void *magic_malloc(size_t n) { + return magic_memalign(MAGIC_ALGN, n); } -void *_calloc(size_t n, size_t z) { +void *magic_calloc(size_t n, size_t z) { void *p; - if ((p = _malloc((n *= z)))) { + if ((p = magic_malloc((n *= z)))) { memset(p, 0, n); } return p; } -void _free(void *p) { +void magic_free(void *p) { // do nothing } -void *_realloc(void *p, size_t n) { +void *magic_realloc(void *p, size_t n) { void *q; if (!p) { - return _malloc(n); + return magic_malloc(n); } if (!n) { - _free(p); + magic_free(p); return 0; } - if ((q = _malloc(n))) { + if ((q = magic_malloc(n))) { memcpy(q, p, ((const size_t *)p)[-1]); } return q; } -#if defined(malloc) -# undef malloc -#endif -#define malloc(x) _malloc(x) - -#if defined(calloc) -# undef calloc -#endif -#define calloc(x) _calloc(x) - -#if defined(realloc) -# undef realloc -#endif -#define realloc(x) _realloc(x) +void* operator new(size_t size) { + return magic_malloc(size); +} -#if defined(free) -# undef free -#endif -#define free(x) _free(x) +void operator delete(void* p) { + magic_free(p); +} // load the model's weights from a file bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { @@ -451,7 +419,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab { struct ggml_init_params params = { /*.mem_size =*/ ctx_size, - /*.mem_buffer =*/ NULL, + /*.mem_buffer =*/ magic_malloc(ctx_size), }; model.ctx = ggml_init(params); @@ -772,7 +740,7 @@ bool llama_eval( const int d_key = n_embd/n_head; static size_t buf_size = 512u*1024*1024; - static void * buf = _malloc(buf_size); + static void * buf = malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N > buf_size) { const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead @@ -780,7 +748,7 @@ bool llama_eval( // reallocate buf_size = buf_size_new; - buf = _realloc(buf, buf_size); + buf = realloc(buf, buf_size); if (buf == nullptr) { fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); return false; diff --git a/mmap.c b/mmap.c new file mode 100644 index 0000000..d9ceb33 --- /dev/null +++ b/mmap.c @@ -0,0 +1,570 @@ +// Lightweight Portable mmap() Polyfill +// +// 1. Supports POSIX.1 +// +// The baseline POSIX standard doesn't specify MAP_ANONYMOUS. This +// library makes sure, on the hypothetical UNIX systems that don't +// have it, or on the mainstream UNIX platforms where the user has +// chosen to define _POSIX_C_SOURCE that cause headers to undefine +// it, this implementation will fallback to creating a secure temp +// file, for each anonymous mapping. +// +// 2. Supports Windows w/ Visual Studio +// +// On Windows Vista and later an API exists that's almost as good as +// mmap(). However code that uses this library should conform to the +// subset of behaviors Microsoft accommodates. +// +// Caveats +// +// - You should just assume the page size is 64kb. That's how it is on +// Windows and it usually goes faster to assume that elsewhere too. +// +// - Not designed to support mprotect() at the moment. In order to +// support this, we'd need to consider _open(O_ACCMODE) on Windows +// and then have mmap() be more greedy about permissions. +// +// - There's limited support for being clever with memory intervals. +// For example, you can't punch a hole in a memory map on Windows. +// This abstraction does aim to offer more flexibility than WIN32. +// There should also be good error reporting for unsupported uses. + +#include "mmap.h" + +#ifdef NEED_POSIX_MMAP +#include + +static void *PosixMmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { + int tfd; + void* res; + char path[] = "/tmp/llama.dat.XXXXXX"; + if (~flags & MAP_ANONYMOUS) { + res = mmap(addr, length, prot, flags, fd, offset); + } else if ((tfd = mkstemp(path)) != -1) { + unlink(path); + if (!ftruncate(tfd, length)) { + res = mmap(addr, length, prot, flags & ~MAP_ANONYMOUS, tfd, 0); + } else { + res = MAP_FAILED; + } + close(tfd); + } else { + res = MAP_FAILED; + } + return res; +} + +#elif defined(NEED_WIN32_MMAP) +#include +#include +#include +#include + +struct WinMap { // O(n) no ordering no overlaps + HANDLE hand; // zero means array slots empty + HANDLE fand; // for the original file, or -1 + uintptr_t addr; // base address (64 kb aligned) + uintptr_t length; // byte size (>0, rounded 64kb) +}; + +struct WinMaps { + int n; + struct WinMap *p; + volatile long lock; +}; + +static struct WinMaps g_winmaps; + +static inline uintptr_t Min(uintptr_t x, uintptr_t y) { + return y > x ? x : y; +} + +static inline uintptr_t Max(uintptr_t x, uintptr_t y) { + return y < x ? x : y; +} + +static inline uintptr_t Roundup(uintptr_t x, intptr_t a) { + assert(a > 0); + assert(!(a & (a - 1))); + return (x + (a - 1)) & -a; +} + +static inline void Lock(void) { + long x; + for (;;) { + x = InterlockedExchange(&g_winmaps.lock, 1); + if (!x) break; + assert(x == 1); + } +} + +static inline void Unlock(void) { + assert(g_winmaps.lock == 1); + g_winmaps.lock = 0; +} + +static int WinStrerror(int err, char *buf, int size) { + return FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + buf, size, NULL); +} + +#ifdef NDEBUG +#define LogError(thing) (void)0 +#else +static void LogError(const char* file, int line, const char* thing) { +#define LogError(thing) LogError(__FILE__, __LINE__, thing) + fprintf(stderr, "%s:%d: error: %s\n", file, line, thing); +} +#endif + +#ifdef NDEBUG +#define LogWindowsError(thing) (void)0 +#else +static void LogWindowsError(const char* file, int line, const char* thing) { +#define LogWindowsError(thing) LogWindowsError(__FILE__, __LINE__, thing) + char s[256]; + int e = GetLastError(); + WinStrerror(e, s, sizeof(s)); + fprintf(stderr, "%s:%d: error[%#x]: %s failed: %s\n", file, line, e, thing, s); +} +#endif + +static void *Recalloc(void *ptr, uint64_t newSize) { + HANDLE heap = GetProcessHeap(); + if (!ptr) { + return HeapAlloc(heap, HEAP_ZERO_MEMORY, newSize); + } + if (!newSize) { + HeapFree(heap, 0, ptr); + return 0; + } + return HeapReAlloc(heap, HEAP_ZERO_MEMORY, ptr, newSize); +} + +uint64_t WinSeek(int fd, uint64_t offset, int whence) { + HANDLE hFile; + DWORD winwhence; + LARGE_INTEGER distanceToMove; + LARGE_INTEGER newFilePointer; + distanceToMove.QuadPart = offset; + switch (whence) { + case SEEK_SET: + winwhence = FILE_BEGIN; + break; + case SEEK_CUR: + winwhence = FILE_CURRENT; + break; + case SEEK_END: + winwhence = FILE_END; + break; + default: + LogError("bad lseek() whence"); + errno = EINVAL; + return -1; + } + hFile = (HANDLE)_get_osfhandle(fd); + if (hFile == INVALID_HANDLE_VALUE) { + LogWindowsError("_get_osfhandle"); + errno = EBADF; + return -1; + } + if (GetFileType(hFile) != FILE_TYPE_DISK) { + LogError("bad file type for lseek()"); + errno = ESPIPE; + return -1; + } + if (!SetFilePointerEx(hFile, distanceToMove, &newFilePointer, winwhence)) { + LogWindowsError("SetFilePointerEx"); + errno = EPERM; + return -1; + } + return newFilePointer.QuadPart; +} + +int WinFtruncate(int fd, uint64_t length) { + HANDLE hFile; + LARGE_INTEGER old, neu; + hFile = (HANDLE)_get_osfhandle(fd); + if (hFile == INVALID_HANDLE_VALUE) { + LogWindowsError("_get_osfhandle"); + errno = EBADF; + return -1; + } + // save current file position + old.QuadPart = 0; + neu.QuadPart = 0; + if (!SetFilePointerEx(hFile, neu, &old, FILE_CURRENT)) { + LogWindowsError("SetFilePointerEx#1"); + return -1; + } + // set current position to new file size + neu.QuadPart = length; + if (!SetFilePointerEx(hFile, neu, NULL, FILE_BEGIN)) { + LogWindowsError("SetFilePointerEx#2"); + return -1; + } + // change the file size + if (!SetEndOfFile(hFile)) { + LogWindowsError("SetEndOfFile"); + SetFilePointerEx(hFile, old, NULL, FILE_BEGIN); + return -1; + } + // restore the original file position + // win32 allows this to exceed the end of file + if (!SetFilePointerEx(hFile, old, NULL, FILE_BEGIN)) { + LogWindowsError("SetFilePointerEx>3"); + return -1; + } + return 0; +} + +int WinMadvise(void *addr, uintptr_t length, int advice) { + switch (advice) { + case MADV_NORMAL: + case MADV_DONTNEED: + case MADV_SEQUENTIAL: + return 0; + case MADV_RANDOM: + case MADV_WILLNEED: { + HANDLE proc; + WIN32_MEMORY_RANGE_ENTRY entry; + proc = GetCurrentProcess(); + entry.VirtualAddress = addr; + entry.NumberOfBytes = length; + if (!PrefetchVirtualMemory(proc, 1, &entry, 0)) { + LogWindowsError("PrefetchVirtualMemory"); + errno = ENOMEM; + return -1; + } + return 0; + } + default: + errno = EINVAL; + return -1; + } +} + +int WinUnmap(void *addr, uintptr_t length) { + void *view; + HANDLE hand; + HANDLE fand; + int i, err = 0; + uintptr_t a, b; + uintptr_t x, y; + // compute the requested interval + // 1. length can't be zero + // 2. length is rounded up to the page size + // 3. addr must be aligned to page boundary + a = (uintptr_t)addr; + b = a + Roundup(length, 65536); + if (!length) { + LogError("tried to munmap zero bytes"); + errno = EINVAL; + return -1; + } + if (a & 65535) { + LogError("tried to munmap an address that's not 64kb aligned"); + errno = EINVAL; + return -1; + } + // 1. we permit unmapping multiple maps in one call + // 2. we don't care if the matched mappings aren't contiguous + // 3. it's an error if a matched mapping only partially overlaps + // 4. similar to close() we release all resources possible on error + Lock(); + for (i = 0; i < g_winmaps.n; ++i) { + if (!g_winmaps.p[i].hand) { + // this array slot is empty + continue; + } + // compute overlap between known mapping and requested interval + x = Max(a, g_winmaps.p[i].addr); + y = Min(b, g_winmaps.p[i].addr + g_winmaps.p[i].length); + if (x >= y) { + // there isn't any overlap + continue; + } + if (y - x != g_winmaps.p[i].length) { + // requested interval partially overlapped this mapping + // therefore we can't unmap it and must report an error + LogError("tried to partially unmap a mapping"); + err = ENOMEM; + continue; + } + // save the information we care about + view = (void *)g_winmaps.p[i].addr; + hand = g_winmaps.p[i].hand; + fand = g_winmaps.p[i].fand; + // delete this mapping from the global array + g_winmaps.p[i].hand = 0; + // perform the systems operations + // safe to release lock since g_winmaps.n is monotonic + Unlock(); + if (!UnmapViewOfFile(view)) { + LogWindowsError("UnmapViewOfFile"); + } + if (!CloseHandle(hand)) { + LogWindowsError("CloseHandle#1"); + } + if (fand != INVALID_HANDLE_VALUE) { + if (!CloseHandle(fand)) { + LogWindowsError("CloseHandle#2"); + } + } + Lock(); + } + Unlock(); + if (err) { + errno = err; + return -1; + } + return 0; +} + +void* WinMap(void *addr, uintptr_t length, int prot, int flags, int fd, uint64_t offset) { + int i; + LPVOID res; + HANDLE hand; + HANDLE hFile; + DWORD access; + DWORD wiprot; + uintptr_t fsize; + if (!length) { + LogError("mmap(length) was zero"); + errno = EINVAL; + return MAP_FAILED; + } + length = Roundup(length, 65536); + if ((uintptr_t)addr & 65535) { + if (~flags & MAP_FIXED) { + addr = 0; + } else { + LogError("MAP_FIXED used with address that's not 64kb aligned"); + errno = EINVAL; + return MAP_FAILED; + } + } + // these are the logical flag equivalents for creating mappings. please + // note that any subsequent virtualprotect calls must be a subset of the + // permissions we're using here. that's not a supported use case for us + if (flags & MAP_PRIVATE) { + // private mapping + if (prot & PROT_EXEC) { + if (prot & PROT_WRITE) { + if (flags & MAP_ANONYMOUS) { + wiprot = PAGE_EXECUTE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE; + } else { + wiprot = PAGE_EXECUTE_WRITECOPY; + access = FILE_MAP_COPY | FILE_MAP_EXECUTE; + } + } else { + wiprot = PAGE_EXECUTE_READ; + access = FILE_MAP_READ | FILE_MAP_EXECUTE; + } + } else if (prot & PROT_WRITE) { + if (flags & MAP_ANONYMOUS) { + wiprot = PAGE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE; + } else { + wiprot = PAGE_WRITECOPY; + access = FILE_MAP_COPY; + } + } else { + wiprot = PAGE_READONLY; + access = FILE_MAP_READ; + } + } else { + // shared mapping + if (prot & PROT_EXEC) { + if (prot & PROT_WRITE) { + wiprot = PAGE_EXECUTE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE; + } else { + wiprot = PAGE_EXECUTE_READ; + access = FILE_MAP_READ | FILE_MAP_EXECUTE; + } + } else if (prot & PROT_WRITE) { + wiprot = PAGE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE; + } else { + wiprot = PAGE_READONLY; + access = FILE_MAP_READ; + } + } + if (flags & MAP_ANONYMOUS) { + hFile = INVALID_HANDLE_VALUE; + fsize = length; + offset = 0; + } else { + fsize = 0; + hFile = (HANDLE)_get_osfhandle(fd); + if (hFile == INVALID_HANDLE_VALUE) { + LogWindowsError("_get_osfhandle"); + errno = EBADF; + return MAP_FAILED; + } + if (!DuplicateHandle(GetCurrentProcess(), hFile, + GetCurrentProcess(), &hFile, + 0, FALSE, DUPLICATE_SAME_ACCESS)) { + LogWindowsError("DuplicateHandle"); + errno = EBADF; + return MAP_FAILED; + } + } + if (flags & MAP_FIXED) { + if (!addr) { + // zero chance of microsoft letting us map the null page + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + errno = EINVAL; + return MAP_FAILED; + } else { + // blow away any existing mappings on requested interval + if (WinUnmap(addr, length) == -1) { + // can only happen if we partially overlap an existing mapping + assert(errno == ENOMEM); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + return MAP_FAILED; + } + } + } + hand = CreateFileMapping(hFile, 0, wiprot, + (DWORD)(fsize >> 32), + (DWORD)fsize, + 0); + if (!hand) { + LogWindowsError("CreateFileMapping"); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + errno = EPERM; + return MAP_FAILED; + } + res = MapViewOfFileEx(hand, access, + (DWORD)(offset >> 32), + (DWORD)offset, + length, addr); + if (!res) { + LogWindowsError("MapViewOfFileEx"); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + CloseHandle(hand); + errno = EPERM; + return MAP_FAILED; + } + if (flags & MAP_FIXED) { + // this assertion could legitimately fail if two threads engage in a + // race to create a MAP_FIXED mapping at the same address and that's + // certainly not the kind of use case we're designed to support here + assert(res == addr); + } + // record our new mapping in the global array + Lock(); + for (i = 0; i < g_winmaps.n; ++i) { + if (!g_winmaps.p[i].hand) { + // we found an empty slot + break; + } + } + if (i == g_winmaps.n) { + // we need to grow the array + // it's important to use kernel32 memory + // our malloc implementation depends on this + int n2; + struct WinMap *p2; + p2 = g_winmaps.p; + n2 = g_winmaps.n; + if (n2) { + n2 += n2 >> 1; + } else { + n2 = 7; + } + if ((p2 = (struct WinMap*)Recalloc(p2, n2 * sizeof(*p2)))) { + g_winmaps.p = p2; + g_winmaps.n = n2; + } else { + Unlock(); + LogError("recalloc failed"); + UnmapViewOfFile(res); + CloseHandle(hand); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + errno = ENOMEM; + return MAP_FAILED; + } + } + g_winmaps.p[i].hand = hand; + g_winmaps.p[i].fand = hFile; + g_winmaps.p[i].addr = (uintptr_t)res; + g_winmaps.p[i].length = length; + Unlock(); + return res; +} + +int WinMsync(void *addr, uintptr_t length, int flags) { + int i, err; + HANDLE hand; + uintptr_t x, y; + if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) { + LogError("bad msync flags"); + errno = EINVAL; + return -1; + } + // 1. we do nothing if length is zero (unlike win32 api) + // 2. the requested interval may envelop multiple known mappings + // 3. we don't care if those mappings aren't contiguous or a hole exists + // 4. the requested interval may specify a subrange of any given mapping + Lock(); + for (err = i = 0; i < g_winmaps.n; ++i) { + if (!g_winmaps.p[i].hand) { + // this array slot is empty + continue; + } + // compute overlap between known mapping and requested interval + x = Max((uintptr_t)addr, g_winmaps.p[i].addr); + y = Min((uintptr_t)addr + length, g_winmaps.p[i].addr + g_winmaps.p[i].length); + if (x >= y) { + // there isn't any overlap + continue; + } + // it's safe to release lock temporarily, since g_winmaps.n is monotonic + // any race conditions in handle being deleted should be caught by win32 + hand = g_winmaps.p[i].fand; + Unlock(); + // ensure coherency and that filesystem flush *will* happen + if (!FlushViewOfFile((void*)x, y - x)) { + LogWindowsError("FlushViewOfFile"); + err = EPERM; + } + if (flags & MS_SYNC) { + // ensure that filesystem flush *has* happened + if (!FlushFileBuffers(hand)) { + LogWindowsError("FlushFileBuffers"); + err = EPERM; + } + } + Lock(); + } + Unlock(); + if (err) { + errno = err; + return -1; + } + return 0; +} + +#else // NEED_*_MAP + +// this is a normal unix platform +// add some content to this object so the apple linker doesn't whine +int justine_mmap_module; + +#endif // NEED_*_MMAP diff --git a/mmap.h b/mmap.h index 8515530..fd9a259 100644 --- a/mmap.h +++ b/mmap.h @@ -1,32 +1,17 @@ #pragma once -// portable mmap() implementation -// -// - supports win32 (needs vista+) -// - supports posix.1 (no map_anonymous) -// -// notes on windows -// -// - no errno support -// - not designed to support mprotect() -// - very poor support for memory intervals - -#include #include #include -#include -#include +#include -#if defined(_MSC_VER) || defined(__MINGW32__) -#ifndef __MINGW32__ -#include -#include -#else -#include +#ifdef __cplusplus +extern "C" { #endif + +#ifdef _MSC_VER +#define NEED_WIN32_MMAP +#include #include -#include -#include #ifndef PROT_READ #define PROT_READ 1 @@ -89,6 +74,26 @@ #define MADV_WILLNEED 3 #endif +#ifndef MS_ASYNC +#define MS_ASYNC 1 +#endif +#ifndef MS_INVALIDATE +#define MS_INVALIDATE 2 +#endif +#ifndef MS_SYNC +#define MS_SYNC 4 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif +#ifndef SEEK_END +#define SEEK_END 2 +#endif + #ifndef mmap #define mmap WinMap #endif @@ -101,8 +106,11 @@ #ifndef close #define close _close #endif -#ifndef fstat -#define fstat _fstati64 +#ifndef lseek +#define lseek WinSeek +#endif +#ifndef msync +#define msync WinMsync #endif #ifndef madvise #define madvise WinMadvise @@ -111,219 +119,27 @@ #define ftruncate WinFtruncate #endif -static std::atomic g_winlock; -static std::map g_winmap; - - -static void WinLock(void) { - while (!g_winlock.exchange(1, std::memory_order_acquire)); -} - -static void WunLock(void) { - g_winlock.store(0, std::memory_order_release); -} - -static int WinMadvise(char* fd, size_t length, int flags) { - auto p_handle = GetCurrentProcess(); - struct _WIN32_MEMORY_RANGE_ENTRY entry((void*)fd, length); - bool success = PrefetchVirtualMemory(p_handle, 1, &entry, 0); - if (!success) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("%s failed with error %d: %s"), - error_code, lpMsgBuf); - } - return 0; -} - -static int WinFtruncate(int fd, uint64_t length) { - return _chsize_s(fd, length) ? -1 : 0; -} +uint64_t WinSeek(int, uint64_t, int); +int WinMsync(void *, uintptr_t, int); +int WinMadvise(void *, uintptr_t, int); +int WinFtruncate(int, uint64_t); +int WinUnmap(void *, uintptr_t); +void *WinMap(void *, uintptr_t, int, int, int, uint64_t); -static int WinUnmap(void *addr, size_t length) { - HANDLE hand; - WinLock(); - hand = g_winmap[addr]; - g_winmap[addr] = 0; - WunLock(); - if (hand) { - UnmapViewOfFile(addr); - CloseHandle(hand); - return 0; - } else { - return -1; - } -} - -static void *WinMap(void *addr, size_t length, int prot, - int flags, int fd, uint64_t offset) { - HANDLE hFile; - DWORD winprot; - DWORD access = 0; - HANDLE hand = NULL; - LPVOID res = NULL; - if (prot & PROT_READ) { - access |= FILE_MAP_READ; - } - if (prot & PROT_WRITE) { - access |= FILE_MAP_WRITE; - } - if (prot & PROT_EXEC) { - access |= FILE_MAP_EXECUTE; - } - if (flags & MAP_PRIVATE) { - // private mapping - if (prot & PROT_EXEC) { - if (prot & PROT_WRITE) { - if (flags & MAP_ANONYMOUS) { - winprot = PAGE_EXECUTE_READWRITE; - } else { - winprot = PAGE_EXECUTE_WRITECOPY; - } - } else { - winprot = PAGE_EXECUTE_READ; - } - } else if (prot & PROT_WRITE) { - if (flags & MAP_ANONYMOUS) { - winprot = PAGE_READWRITE; - } else { - winprot = PAGE_WRITECOPY; - } - } else { - winprot = PAGE_READONLY; - } - } else { - // shared mapping - if (prot & PROT_EXEC) { - if (prot & PROT_WRITE) { - winprot = PAGE_EXECUTE_READWRITE; - } else { - winprot = PAGE_EXECUTE_READ; - } - } else if (prot & PROT_WRITE) { - winprot = PAGE_READWRITE; - } else { - winprot = PAGE_READONLY; - } - } - if (flags & MAP_ANONYMOUS) { - hFile = INVALID_HANDLE_VALUE; - offset = 0; - } else { - hFile = (HANDLE)_get_osfhandle(fd); - if (hFile == INVALID_HANDLE_VALUE) { - return MAP_FAILED; - } - } - if (flags & MAP_FIXED) { - if (!addr) { - return MAP_FAILED; - } else { - WinUnmap(addr, length); - } - } - hand = CreateFileMapping(hFile, 0, winprot, - (offset + length) >> 32, - (offset + length), 0); - if (!hand) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("%s failed with error %d: %s"), - error_code, lpMsgBuf); - return MAP_FAILED; - } - if (winprot == PAGE_WRITECOPY) { - access = FILE_MAP_COPY; - } +#else // _MSC_VER - res = MapViewOfFileEx(hand, access, offset >> 32, - offset, length, addr); - if (!res) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("failed with error %d: %s"), - error_code, lpMsgBuf); - fprintf(stderr, (char*)lpDisplayBuf); - CloseHandle(hand); - return MAP_FAILED; - } - WinLock(); - g_winmap[res] = hand; - WunLock(); - return res; -} - -#else #include #include + #ifndef MAP_ANONYMOUS +#define NEED_POSIX_MMAP +#define mmap PosixMmap #define MAP_ANONYMOUS 0x10000000 +void *PosixMmap(void*, size_t, int, int, int, off_t); +#endif // MAP_ANONYMOUS -static void *PosixMmap(void *addr, size_t length, int prot, - int flags, int fd, uint64_t offset) { - int tfd; - void *res; - char path[] = "/tmp/llama.dat.XXXXXX"; - if (~flags & MAP_ANONYMOUS) { - res = mmap(addr, length, prot, flags, fd, offset); - } else if ((tfd = mkstemp(path)) != -1) { - unlink(path); - if (!ftruncate(tfd, length)) { - res = mmap(addr, length, prot, flags & ~MAP_ANONYMOUS, tfd, 0); - } else { - res = MAP_FAILED; - } - close(tfd); - } else { - res = MAP_FAILED; - } - return res; -} +#endif // _MSC_VER -#ifndef mmap -#define mmap PosixMmap +#ifdef __cplusplus +} #endif -#endif // MAP_ANONYMOUS -#endif // _MSC_VER