diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index 5ffd12b8b2..c2ae0db107 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -15,6 +15,8 @@ #include #include +#include "moz-overrides.h" + template struct type_to_gguf_type; diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp index 8d94034aed..995c5fa41d 100644 --- a/src/llama-adapter.cpp +++ b/src/llama-adapter.cpp @@ -8,6 +8,8 @@ #include #include +#include "moz-overrides.h" + // vec ggml_tensor * llama_adapter_cvec::tensor_for(int il) const { diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 5d317f4ee6..6a7bed5305 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -6,6 +6,8 @@ #include #include +#include "moz-overrides.h" + #if __cplusplus >= 202000L #define LU8(x) (const char*)(u8##x) #else diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 06e93b19cb..e932b11ac1 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -12,6 +12,8 @@ #include #include +#include "moz-overrides.h" + // // llama_context // diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index bed706bb24..04f4b484e5 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -8,6 +8,8 @@ #include #include +#include "moz-overrides.h" + // // helpers // diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index 8517b722a9..c0e6d05c35 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -12,6 +12,8 @@ #include #include +#include "moz-overrides.h" + // // llama_kv_cache_unified // diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp index 1b1e95d567..b6624a3864 100644 --- a/src/llama-memory-recurrent.cpp +++ b/src/llama-memory-recurrent.cpp @@ -11,6 +11,8 @@ #include #include +#include "moz-overrides.h" + // // llama_memory_recurrent // diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp index 47497cf953..c5f8a28bb3 100644 --- a/src/llama-mmap.cpp +++ b/src/llama-mmap.cpp @@ -39,6 +39,8 @@ #include #endif +#include "moz-overrides.h" + // TODO: consider moving to llama-impl.h if needed in more places #if defined(_WIN32) static std::string llama_format_win_err(DWORD err) { diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index bd9e6da883..1a90a1eb88 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -7,6 +7,8 @@ #include #include +#include "moz-overrides.h" + static const size_t kiB = 1024; static const size_t MiB = 1024*kiB; static const size_t GiB = 1024*MiB; diff --git a/src/llama-model-loader.h b/src/llama-model-loader.h index 0f52b011b6..ded54f544a 100644 --- a/src/llama-model-loader.h +++ b/src/llama-model-loader.h @@ -34,12 +34,14 @@ llama_tensor_weight(const llama_file * file, uint16_t idx, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) { const int tensor_idx = gguf_find_tensor(gguf_ctx, ggml_get_name(tensor)); if (tensor_idx < 0) { - throw std::runtime_error(format("tensor '%s' not found in the model", ggml_get_name(tensor))); + // throw std::runtime_error(format("tensor '%s' not found in the model", ggml_get_name(tensor))); + std::abort(); } offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx); if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size()) { - throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", ggml_get_name(tensor))); + //throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", ggml_get_name(tensor))); + std::abort(); } } }; diff --git a/src/llama-model.cpp b/src/llama-model.cpp index fc39195ed5..e041f7e1a6 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -25,6 +25,8 @@ #include #include +#include "moz-overrides.h" + const char * llm_type_name(llm_type type) { switch (type) { case LLM_TYPE_14M: return "14M"; diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index bfbf5fa230..dd1d9eee52 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -17,6 +17,8 @@ #include #include +#include "moz-overrides.h" + // the ring buffer works similarly to std::deque, but with a fixed capacity template struct ring_buffer { diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 5c9eb87566..334f862100 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -20,6 +20,8 @@ #include #include +#include "moz-overrides.h" + // // helpers // diff --git a/src/llama.cpp b/src/llama.cpp index 34906cdb62..0adb16598e 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17,6 +17,8 @@ #include #include +#include "moz-overrides.h" + #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif diff --git a/src/unicode.cpp b/src/unicode.cpp index 43a4581b96..6b3b2dbe7d 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -19,6 +19,8 @@ #include #include +#include "moz-overrides.h" + size_t unicode_len_utf8(char src) { const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; uint8_t highbits = static_cast(src) >> 4;