Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- feat: Update llama.cpp to ggerganov/llama.cpp@3bd9aa1f9 and sync Python bindings
- feat: Update llama.cpp to ggerganov/llama.cpp@f53577432 and sync Python bindings

## [0.3.20]

Expand Down
55 changes: 0 additions & 55 deletions llama_cpp/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1516,54 +1516,6 @@ def llama_free(ctx: llama_context_p, /):
...


# enum llama_params_fit_status {
# LLAMA_PARAMS_FIT_STATUS_SUCCESS = 0,
# LLAMA_PARAMS_FIT_STATUS_FAILURE = 1,
# LLAMA_PARAMS_FIT_STATUS_ERROR = 2,
# };
LLAMA_PARAMS_FIT_STATUS_SUCCESS = 0
LLAMA_PARAMS_FIT_STATUS_FAILURE = 1
LLAMA_PARAMS_FIT_STATUS_ERROR = 2


# LLAMA_API enum llama_params_fit_status llama_params_fit(
# const char * path_model,
# struct llama_model_params * mparams,
# struct llama_context_params * cparams,
# float * tensor_split,
# struct llama_model_tensor_buft_override * tensor_buft_overrides,
# size_t * margins,
# uint32_t n_ctx_min,
# enum ggml_log_level log_level);
@ctypes_function(
"llama_params_fit",
[
ctypes.c_char_p,
ctypes.POINTER(llama_model_params),
ctypes.POINTER(llama_context_params),
ctypes.POINTER(ctypes.c_float),
ctypes.c_void_p,
ctypes.POINTER(ctypes.c_size_t),
ctypes.c_uint32,
ctypes.c_int,
],
ctypes.c_int,
)
def llama_params_fit(
path_model: bytes,
mparams: CtypesPointerOrRef[llama_model_params],
cparams: CtypesPointerOrRef[llama_context_params],
tensor_split: Optional[CtypesPointer[ctypes.c_float]],
tensor_buft_overrides: ctypes.c_void_p,
margins: Optional[CtypesPointer[ctypes.c_size_t]],
n_ctx_min: int,
log_level: int,
/,
) -> int:
"""Fit model and context parameters for a model path."""
...


# LLAMA_API int64_t llama_time_us(void);
@ctypes_function(
"llama_time_us",
Expand Down Expand Up @@ -4869,13 +4821,6 @@ def llama_perf_sampler_print(chain: llama_sampler_p, /): ...
def llama_perf_sampler_reset(chain: llama_sampler_p, /): ...


# // print a breakdown of per-device memory use via LLAMA_LOG:
@ctypes_function("llama_memory_breakdown_print", [llama_context_p_ctypes], None)
def llama_memory_breakdown_print(ctx: llama_context_p, /):
"""Print a breakdown of per-device memory use."""
...


# //
# // training
# //
Expand Down
73 changes: 63 additions & 10 deletions llama_cpp/mtmd_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
c_int,
c_uint8,
c_uint32,
c_size_t,
c_float,
c_void_p,
c_size_t,
POINTER,
_Pointer, # type: ignore
Structure,
Expand Down Expand Up @@ -123,6 +123,17 @@ class mtmd_input_text(Structure):
]


class mtmd_decoder_pos(Structure):
"""Decoder attention position for M-RoPE models."""

_fields_ = [
("t", c_uint32),
("x", c_uint32),
("y", c_uint32),
("z", c_uint32),
]


################################################
# mtmd.h functions
################################################
Expand Down Expand Up @@ -165,35 +176,41 @@ def mtmd_init_from_file(
def mtmd_free(ctx: mtmd_context_p, /): ...


# MTMD_API bool mtmd_decode_use_non_causal(mtmd_context * ctx);
@ctypes_function("mtmd_decode_use_non_causal", [mtmd_context_p_ctypes], c_bool)
def mtmd_decode_use_non_causal(ctx: mtmd_context_p, /) -> bool:
# MTMD_API bool mtmd_decode_use_non_causal(const mtmd_context * ctx, const mtmd_input_chunk * chunk);
@ctypes_function(
"mtmd_decode_use_non_causal",
[mtmd_context_p_ctypes, mtmd_input_chunk_p_ctypes],
c_bool,
)
def mtmd_decode_use_non_causal(
ctx: mtmd_context_p, chunk: Optional[mtmd_input_chunk_p], /
) -> bool:
"""Check whether MTMD decoding uses non-causal attention."""
...


# MTMD_API bool mtmd_decode_use_mrope(mtmd_context * ctx);
# MTMD_API bool mtmd_decode_use_mrope(const mtmd_context * ctx);
@ctypes_function("mtmd_decode_use_mrope", [mtmd_context_p_ctypes], c_bool)
def mtmd_decode_use_mrope(ctx: mtmd_context_p, /) -> bool:
"""Check whether MTMD decoding uses mRoPE."""
...


# MTMD_API bool mtmd_support_vision(mtmd_context * ctx);
# MTMD_API bool mtmd_support_vision(const mtmd_context * ctx);
@ctypes_function("mtmd_support_vision", [mtmd_context_p_ctypes], c_bool)
def mtmd_support_vision(ctx: mtmd_context_p, /) -> bool:
"""Check whether the current model supports vision input."""
...


# MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
# MTMD_API bool mtmd_support_audio(const mtmd_context * ctx);
@ctypes_function("mtmd_support_audio", [mtmd_context_p_ctypes], c_bool)
def mtmd_support_audio(ctx: mtmd_context_p, /) -> bool:
"""Check whether MTMD supports audio."""
...


# MTMD_API int mtmd_get_audio_sample_rate(mtmd_context * ctx);
# MTMD_API int mtmd_get_audio_sample_rate(const mtmd_context * ctx);
@ctypes_function("mtmd_get_audio_sample_rate", [mtmd_context_p_ctypes], c_int)
def mtmd_get_audio_sample_rate(ctx: mtmd_context_p, /) -> int:
"""Get the audio sample rate in Hz. Returns -1 if audio is not supported."""
Expand Down Expand Up @@ -418,14 +435,16 @@ def mtmd_image_tokens_get_n_tokens(image_tokens: mtmd_image_tokens_p, /) -> int:
...


# MTMD_API size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens);
# DEPRECATED(MTMD_API size_t mtmd_image_tokens_get_nx(const mtmd_image_tokens * image_tokens),
# "use mtmd_image_tokens_get_decoder_pos() instead");
@ctypes_function("mtmd_image_tokens_get_nx", [mtmd_image_tokens_p_ctypes], c_size_t)
def mtmd_image_tokens_get_nx(image_tokens: mtmd_image_tokens_p, /) -> int:
"""Get the image token grid width."""
...


# MTMD_API size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens);
# DEPRECATED(MTMD_API size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens),
# "use mtmd_image_tokens_get_decoder_pos() instead");
@ctypes_function("mtmd_image_tokens_get_ny", [mtmd_image_tokens_p_ctypes], c_size_t)
def mtmd_image_tokens_get_ny(image_tokens: mtmd_image_tokens_p, /) -> int:
"""Get the image token grid height."""
Expand All @@ -450,6 +469,23 @@ def mtmd_image_tokens_get_n_pos(image_tokens: mtmd_image_tokens_p, /) -> int:
...


# MTMD_API struct mtmd_decoder_pos mtmd_image_tokens_get_decoder_pos(
# const mtmd_image_tokens * image_tokens, llama_pos pos_0, size_t i);
@ctypes_function(
"mtmd_image_tokens_get_decoder_pos",
[mtmd_image_tokens_p_ctypes, llama_cpp.llama_pos, c_size_t],
mtmd_decoder_pos,
)
def mtmd_image_tokens_get_decoder_pos(
image_tokens: mtmd_image_tokens_p,
pos_0: llama_cpp.llama_pos,
i: Union[c_size_t, int],
/,
) -> mtmd_decoder_pos:
"""Get decoder attention position for an image embedding token."""
...


# MTMD_API int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens);
@ctypes_function(
"mtmd_encode",
Expand Down Expand Up @@ -534,6 +570,23 @@ def mtmd_helper_get_n_pos(chunks: mtmd_input_chunks_p, /) -> int:
...


# MTMD_API void mtmd_helper_image_get_decoder_pos(
# const mtmd_image_tokens * image, llama_pos pos_0, struct mtmd_decoder_pos * out_pos);
@ctypes_function(
"mtmd_helper_image_get_decoder_pos",
[mtmd_image_tokens_p_ctypes, llama_cpp.llama_pos, POINTER(mtmd_decoder_pos)],
None,
)
def mtmd_helper_image_get_decoder_pos(
image: mtmd_image_tokens_p,
pos_0: llama_cpp.llama_pos,
out_pos: "_Pointer[mtmd_decoder_pos]",
/,
):
"""Fill decoder attention positions for all image embedding tokens."""
...


# MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
# struct llama_context * lctx,
# const mtmd_input_chunks * chunks,
Expand Down
2 changes: 1 addition & 1 deletion vendor/llama.cpp
Loading