Make WIN32 mmap() improvements (#341)

Still not fully working yet.

Closes #341
pull/564/head^2
oKatanaaa 1 year ago committed by Justine Tunney
parent 0b5448a3a4
commit e4881686b4
No known key found for this signature in database
GPG Key ID: BE714B4575D6E328

409
.gitignore vendored

@ -22,3 +22,412 @@ models/*
arm_neon.h
compile_commands.json
CMakeFiles/
CMakeCache.txt
# Visual Studio stuff
*.exe
*.sln
*.vcxproj
*.vcxproj.filters
cmake_install.cmake
*.dir/
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp
# Visual Studio 6 technical files
*.ncb
*.aps
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml

@ -2,7 +2,7 @@
#include "ggml.h"
#if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW
//#include <malloc.h> // using malloc.h with MSC/MINGW
#elif !defined(__FreeBSD__) && !defined(__NetBSD__)
#include <alloca.h>
#endif
@ -2437,7 +2437,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
*ctx = (struct ggml_context) {
/*.mem_size =*/ params.mem_size,
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : _malloc(params.mem_size),
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
/*.n_objects =*/ 0,
/*.objects_begin =*/ NULL,
@ -2469,7 +2469,7 @@ void ggml_free(struct ggml_context * ctx) {
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
if (ctx->mem_buffer_owned) {
free(ctx->mem_buffer);
_free(ctx->mem_buffer);
}
found = true;

@ -183,6 +183,9 @@ extern "C" {
#define GGML_MAX_CONTEXTS 64
#define GGML_MAX_OPT 4
void* _malloc(size_t n);
void _free(void* p);
#ifdef __ARM_NEON
// we use the built-in 16-bit float type
typedef __fp16 ggml_fp16_t;

@ -1,3 +1,7 @@
#if defined(_MSC_VER) || defined(__MINGW32__)
#define NOMINMAX
#endif
#include "ggml.h"
#include "utils.h"
@ -19,6 +23,10 @@
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#else
#include <errno.h>
#define msync(addr, len_bytes, flag) winMSync
#define MS_ASYNC 0
#endif
#define ROUNDUP(X, K) (((X) + (K)-1) & -(K))
@ -96,6 +104,7 @@ struct llama_model {
std::map<std::string, struct ggml_tensor *> tensors;
};
struct magic {
uint32_t magic;
std::atomic<unsigned> lock;
@ -103,10 +112,37 @@ struct magic {
size_t commit;
size_t offset;
size_t capacity;
gpt_vocab *vocab;
llama_model *model;
gpt_vocab* vocab;
llama_model* model;
};
static void winMSync(magic* addr, size_t len_bytes) {
bool success = FlushViewOfFile((void*)addr, len_bytes);
if (!success) {
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD error_code = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
error_code,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR)&lpMsgBuf,
0, NULL);
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("failed with error %d: %s"),
error_code, lpMsgBuf);
}
HANDLE hFile = (HANDLE)_get_osfhandle(addr->fd);
FlushFileBuffers(hFile);
}
static struct magic *mag;
static inline void spin_lock(std::atomic<unsigned> &lock) {
@ -129,17 +165,26 @@ static void magic_commit(void) {
mag->offset = mag->capacity;
mag->commit = mag->capacity;
mag->magic = 0xFEEDABEE;
msync(mag, mag->commit, MS_ASYNC);
bool success = msync(mag, mag->commit, MS_ASYNC);
}
static void magic_init(void) {
int fd;
size_t n;
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
struct stat st;
#else
struct _stat64 st;
#endif
if (mag) return;
n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN);
if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) {
fstat(fd, &st);
int result = fstat(fd, &st);
int error = errno;
if (errno == EBADF)
fprintf(stderr, "Bad file descriptor.\n");
else if (errno == EINVAL)
fprintf(stderr, "Invalid argument to _fstat.\n");
if (st.st_size >= n) {
mag = (struct magic *)Mmap(MAGIC_ADDR, n,
PROT_READ | PROT_WRITE,
@ -182,9 +227,9 @@ void *memalign(size_t a, size_t n) {
i = i + sizeof(size_t);
i = ROUNDUP(i, a);
j = ROUNDUP(i + m, MAGIC_GRAN);
if (j > mag->capacity) {
//if (j > mag->capacity) {
if (!mag->magic) {
ftruncate(mag->fd, j);
int result = ftruncate(mag->fd, j);
p = mmap(MAGIC_ADDR + mag->capacity,
j - mag->capacity, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity);
@ -199,7 +244,7 @@ void *memalign(size_t a, size_t n) {
spin_unlock(mag->lock);
return 0;
}
}
//}
mag->offset = i + m;
spin_unlock(mag->lock);
p = MAGIC_ADDR + i;
@ -207,7 +252,7 @@ void *memalign(size_t a, size_t n) {
return p;
}
void *malloc(size_t n) {
void *_malloc(size_t n) {
return memalign(MAGIC_ALGN, n);
}
@ -215,33 +260,53 @@ size_t malloc_usable_size(const void *p) {
return ((const size_t *)p)[-1];
}
void *calloc(size_t n, size_t z) {
void *_calloc(size_t n, size_t z) {
void *p;
if ((p = malloc((n *= z)))) {
if ((p = _malloc((n *= z)))) {
memset(p, 0, n);
}
return p;
}
void free(void *p) {
void _free(void *p) {
// do nothing
}
void *realloc(void *p, size_t n) {
void *_realloc(void *p, size_t n) {
void *q;
if (!p) {
return malloc(n);
return _malloc(n);
}
if (!n) {
free(p);
_free(p);
return 0;
}
if ((q = malloc(n))) {
if ((q = _malloc(n))) {
memcpy(q, p, ((const size_t *)p)[-1]);
}
return q;
}
#if defined(malloc)
# undef malloc
#endif
#define malloc(x) _malloc(x)
#if defined(calloc)
# undef calloc
#endif
#define calloc(x) _calloc(x)
#if defined(realloc)
# undef realloc
#endif
#define realloc(x) _realloc(x)
#if defined(free)
# undef free
#endif
#define free(x) _free(x)
// load the model's weights from a file
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
@ -707,7 +772,7 @@ bool llama_eval(
const int d_key = n_embd/n_head;
static size_t buf_size = 512u*1024*1024;
static void * buf = malloc(buf_size);
static void * buf = _malloc(buf_size);
if (mem_per_token > 0 && mem_per_token*N > buf_size) {
const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
@ -715,7 +780,7 @@ bool llama_eval(
// reallocate
buf_size = buf_size_new;
buf = realloc(buf, buf_size);
buf = _realloc(buf, buf_size);
if (buf == nullptr) {
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
return false;

@ -20,6 +20,7 @@
#if defined(_MSC_VER) || defined(__MINGW32__)
#ifndef __MINGW32__
#include <Windows.h>
#include <strsafe.h>
#else
#include <windows.h>
#endif
@ -101,7 +102,7 @@
#define close _close
#endif
#ifndef fstat
#define fstat _fstat
#define fstat _fstati64
#endif
#ifndef madvise
#define madvise WinMadvise
@ -113,6 +114,7 @@
static std::atomic<unsigned> g_winlock;
static std::map<LPVOID, HANDLE> g_winmap;
static void WinLock(void) {
while (!g_winlock.exchange(1, std::memory_order_acquire));
}
@ -121,7 +123,30 @@ static void WunLock(void) {
g_winlock.store(0, std::memory_order_release);
}
static int WinMadvise(int fd, size_t length, int flags) {
static int WinMadvise(char* fd, size_t length, int flags) {
auto p_handle = GetCurrentProcess();
struct _WIN32_MEMORY_RANGE_ENTRY entry((void*)fd, length);
bool success = PrefetchVirtualMemory(p_handle, 1, &entry, 0);
if (!success) {
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD error_code = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
error_code,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR)&lpMsgBuf,
0, NULL);
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("%s failed with error %d: %s"),
error_code, lpMsgBuf);
}
return 0;
}
@ -215,11 +240,52 @@ static void *WinMap(void *addr, size_t length, int prot,
(offset + length) >> 32,
(offset + length), 0);
if (!hand) {
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD error_code = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
error_code,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR)&lpMsgBuf,
0, NULL);
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("%s failed with error %d: %s"),
error_code, lpMsgBuf);
return MAP_FAILED;
}
if (winprot == PAGE_WRITECOPY) {
access = FILE_MAP_COPY;
}
res = MapViewOfFileEx(hand, access, offset >> 32,
offset, length, addr);
if (!res) {
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD error_code = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
error_code,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR)&lpMsgBuf,
0, NULL);
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("failed with error %d: %s"),
error_code, lpMsgBuf);
fprintf(stderr, (char*)lpDisplayBuf);
CloseHandle(hand);
return MAP_FAILED;
}

Loading…
Cancel
Save