Skip to content

Commit d87bf08

Browse files
authored
feat: Update llama.cpp to ggml-org/llama.cpp@f53577432 (#2189)
* feat: Update llama.cpp to ggml-org/llama.cpp@f53577432 * docs: Update changelog for llama.cpp f53577432 * docs: Keep one unreleased llama.cpp changelog entry
1 parent 1b1a320 commit d87bf08

4 files changed

Lines changed: 65 additions & 67 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10-
- feat: Update llama.cpp to ggerganov/llama.cpp@3bd9aa1f9 and sync Python bindings
10+
- feat: Update llama.cpp to ggerganov/llama.cpp@f53577432 and sync Python bindings
1111

1212
## [0.3.20]
1313

llama_cpp/llama_cpp.py

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,54 +1516,6 @@ def llama_free(ctx: llama_context_p, /):
15161516
...
15171517

15181518

1519-
# enum llama_params_fit_status {
1520-
# LLAMA_PARAMS_FIT_STATUS_SUCCESS = 0,
1521-
# LLAMA_PARAMS_FIT_STATUS_FAILURE = 1,
1522-
# LLAMA_PARAMS_FIT_STATUS_ERROR = 2,
1523-
# };
1524-
LLAMA_PARAMS_FIT_STATUS_SUCCESS = 0
1525-
LLAMA_PARAMS_FIT_STATUS_FAILURE = 1
1526-
LLAMA_PARAMS_FIT_STATUS_ERROR = 2
1527-
1528-
1529-
# LLAMA_API enum llama_params_fit_status llama_params_fit(
1530-
# const char * path_model,
1531-
# struct llama_model_params * mparams,
1532-
# struct llama_context_params * cparams,
1533-
# float * tensor_split,
1534-
# struct llama_model_tensor_buft_override * tensor_buft_overrides,
1535-
# size_t * margins,
1536-
# uint32_t n_ctx_min,
1537-
# enum ggml_log_level log_level);
1538-
@ctypes_function(
1539-
"llama_params_fit",
1540-
[
1541-
ctypes.c_char_p,
1542-
ctypes.POINTER(llama_model_params),
1543-
ctypes.POINTER(llama_context_params),
1544-
ctypes.POINTER(ctypes.c_float),
1545-
ctypes.c_void_p,
1546-
ctypes.POINTER(ctypes.c_size_t),
1547-
ctypes.c_uint32,
1548-
ctypes.c_int,
1549-
],
1550-
ctypes.c_int,
1551-
)
1552-
def llama_params_fit(
1553-
path_model: bytes,
1554-
mparams: CtypesPointerOrRef[llama_model_params],
1555-
cparams: CtypesPointerOrRef[llama_context_params],
1556-
tensor_split: Optional[CtypesPointer[ctypes.c_float]],
1557-
tensor_buft_overrides: ctypes.c_void_p,
1558-
margins: Optional[CtypesPointer[ctypes.c_size_t]],
1559-
n_ctx_min: int,
1560-
log_level: int,
1561-
/,
1562-
) -> int:
1563-
"""Fit model and context parameters for a model path."""
1564-
...
1565-
1566-
15671519
# LLAMA_API int64_t llama_time_us(void);
15681520
@ctypes_function(
15691521
"llama_time_us",
@@ -4869,13 +4821,6 @@ def llama_perf_sampler_print(chain: llama_sampler_p, /): ...
48694821
def llama_perf_sampler_reset(chain: llama_sampler_p, /): ...
48704822

48714823

4872-
# // print a breakdown of per-device memory use via LLAMA_LOG:
4873-
@ctypes_function("llama_memory_breakdown_print", [llama_context_p_ctypes], None)
4874-
def llama_memory_breakdown_print(ctx: llama_context_p, /):
4875-
"""Print a breakdown of per-device memory use."""
4876-
...
4877-
4878-
48794824
# //
48804825
# // training
48814826
# //

llama_cpp/mtmd_cpp.py

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
c_int,
99
c_uint8,
1010
c_uint32,
11+
c_size_t,
1112
c_float,
1213
c_void_p,
13-
c_size_t,
1414
POINTER,
1515
_Pointer, # type: ignore
1616
Structure,
@@ -123,6 +123,17 @@ class mtmd_input_text(Structure):
123123
]
124124

125125

126+
class mtmd_decoder_pos(Structure):
127+
"""Decoder attention position for M-RoPE models."""
128+
129+
_fields_ = [
130+
("t", c_uint32),
131+
("x", c_uint32),
132+
("y", c_uint32),
133+
("z", c_uint32),
134+
]
135+
136+
126137
################################################
127138
# mtmd.h functions
128139
################################################
@@ -165,35 +176,41 @@ def mtmd_init_from_file(
165176
def mtmd_free(ctx: mtmd_context_p, /): ...
166177

167178

168-
# MTMD_API bool mtmd_decode_use_non_causal(mtmd_context * ctx);
169-
@ctypes_function("mtmd_decode_use_non_causal", [mtmd_context_p_ctypes], c_bool)
170-
def mtmd_decode_use_non_causal(ctx: mtmd_context_p, /) -> bool:
179+
# MTMD_API bool mtmd_decode_use_non_causal(const mtmd_context * ctx, const mtmd_input_chunk * chunk);
180+
@ctypes_function(
181+
"mtmd_decode_use_non_causal",
182+
[mtmd_context_p_ctypes, mtmd_input_chunk_p_ctypes],
183+
c_bool,
184+
)
185+
def mtmd_decode_use_non_causal(
186+
ctx: mtmd_context_p, chunk: Optional[mtmd_input_chunk_p], /
187+
) -> bool:
171188
"""Check whether MTMD decoding uses non-causal attention."""
172189
...
173190

174191

175-
# MTMD_API bool mtmd_decode_use_mrope(mtmd_context * ctx);
192+
# MTMD_API bool mtmd_decode_use_mrope(const mtmd_context * ctx);
176193
@ctypes_function("mtmd_decode_use_mrope", [mtmd_context_p_ctypes], c_bool)
177194
def mtmd_decode_use_mrope(ctx: mtmd_context_p, /) -> bool:
178195
"""Check whether MTMD decoding uses mRoPE."""
179196
...
180197

181198

182-
# MTMD_API bool mtmd_support_vision(mtmd_context * ctx);
199+
# MTMD_API bool mtmd_support_vision(const mtmd_context * ctx);
183200
@ctypes_function("mtmd_support_vision", [mtmd_context_p_ctypes], c_bool)
184201
def mtmd_support_vision(ctx: mtmd_context_p, /) -> bool:
185202
"""Check whether the current model supports vision input."""
186203
...
187204

188205

189-
# MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
206+
# MTMD_API bool mtmd_support_audio(const mtmd_context * ctx);
190207
@ctypes_function("mtmd_support_audio", [mtmd_context_p_ctypes], c_bool)
191208
def mtmd_support_audio(ctx: mtmd_context_p, /) -> bool:
192209
"""Check whether MTMD supports audio."""
193210
...
194211

195212

196-
# MTMD_API int mtmd_get_audio_sample_rate(mtmd_context * ctx);
213+
# MTMD_API int mtmd_get_audio_sample_rate(const mtmd_context * ctx);
197214
@ctypes_function("mtmd_get_audio_sample_rate", [mtmd_context_p_ctypes], c_int)
198215
def mtmd_get_audio_sample_rate(ctx: mtmd_context_p, /) -> int:
199216
"""Get the audio sample rate in Hz. Returns -1 if audio is not supported."""
@@ -418,14 +435,16 @@ def mtmd_image_tokens_get_n_tokens(image_tokens: mtmd_image_tokens_p, /) -> int:
418435
...
419436

420437

421-
# MTMD_API size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens);
438+
# DEPRECATED(MTMD_API size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens),
439+
# "use mtmd_image_tokens_get_decoder_pos() instead");
422440
@ctypes_function("mtmd_image_tokens_get_nx", [mtmd_image_tokens_p_ctypes], c_size_t)
423441
def mtmd_image_tokens_get_nx(image_tokens: mtmd_image_tokens_p, /) -> int:
424442
"""Get the image token grid width."""
425443
...
426444

427445

428-
# MTMD_API size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens);
446+
# DEPRECATED(MTMD_API size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens),
447+
# "use mtmd_image_tokens_get_decoder_pos() instead");
429448
@ctypes_function("mtmd_image_tokens_get_ny", [mtmd_image_tokens_p_ctypes], c_size_t)
430449
def mtmd_image_tokens_get_ny(image_tokens: mtmd_image_tokens_p, /) -> int:
431450
"""Get the image token grid height."""
@@ -450,6 +469,23 @@ def mtmd_image_tokens_get_n_pos(image_tokens: mtmd_image_tokens_p, /) -> int:
450469
...
451470

452471

472+
# MTMD_API struct mtmd_decoder_pos mtmd_image_tokens_get_decoder_pos(
473+
# const mtmd_image_tokens * image_tokens, llama_pos pos_0, size_t i);
474+
@ctypes_function(
475+
"mtmd_image_tokens_get_decoder_pos",
476+
[mtmd_image_tokens_p_ctypes, llama_cpp.llama_pos, c_size_t],
477+
mtmd_decoder_pos,
478+
)
479+
def mtmd_image_tokens_get_decoder_pos(
480+
image_tokens: mtmd_image_tokens_p,
481+
pos_0: llama_cpp.llama_pos,
482+
i: Union[c_size_t, int],
483+
/,
484+
) -> mtmd_decoder_pos:
485+
"""Get decoder attention position for an image embedding token."""
486+
...
487+
488+
453489
# MTMD_API int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens);
454490
@ctypes_function(
455491
"mtmd_encode",
@@ -534,6 +570,23 @@ def mtmd_helper_get_n_pos(chunks: mtmd_input_chunks_p, /) -> int:
534570
...
535571

536572

573+
# MTMD_API void mtmd_helper_image_get_decoder_pos(
574+
# const mtmd_image_tokens * image, llama_pos pos_0, struct mtmd_decoder_pos * out_pos);
575+
@ctypes_function(
576+
"mtmd_helper_image_get_decoder_pos",
577+
[mtmd_image_tokens_p_ctypes, llama_cpp.llama_pos, POINTER(mtmd_decoder_pos)],
578+
None,
579+
)
580+
def mtmd_helper_image_get_decoder_pos(
581+
image: mtmd_image_tokens_p,
582+
pos_0: llama_cpp.llama_pos,
583+
out_pos: "_Pointer[mtmd_decoder_pos]",
584+
/,
585+
):
586+
"""Fill decoder attention positions for all image embedding tokens."""
587+
...
588+
589+
537590
# MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
538591
# struct llama_context * lctx,
539592
# const mtmd_input_chunks * chunks,

vendor/llama.cpp

0 commit comments

Comments
 (0)