diff --git a/models/z-ai/glm-4.5-air-subscription.yaml b/models/z-ai/glm-4.5-air-subscription.yaml new file mode 100644 index 0000000..39f70c5 --- /dev/null +++ b/models/z-ai/glm-4.5-air-subscription.yaml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: subscription +model: glm-4.5-air +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.6 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning diff --git a/models/z-ai/glm-4.5-air.yaml b/models/z-ai/glm-4.5-air.yaml new file mode 100644 index 0000000..67bb627 --- /dev/null +++ b/models/z-ai/glm-4.5-air.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.5-air +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.6 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.5-airx.yaml b/models/z-ai/glm-4.5-airx.yaml new file mode 100644 index 0000000..3fa7d81 --- /dev/null +++ b/models/z-ai/glm-4.5-airx.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.5-airx +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.6 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.5-flash.yaml b/models/z-ai/glm-4.5-flash.yaml new file mode 100644 index 0000000..f7a0a0e --- /dev/null +++ b/models/z-ai/glm-4.5-flash.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.5-flash +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.6 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.5-x.yaml b/models/z-ai/glm-4.5-x.yaml new file mode 100644 index 0000000..31a4c27 --- /dev/null +++ b/models/z-ai/glm-4.5-x.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.5-x +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.6 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.5.yaml b/models/z-ai/glm-4.5.yaml new file mode 100644 index 0000000..b93d2ef --- /dev/null +++ b/models/z-ai/glm-4.5.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.5 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.6 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.6.yaml b/models/z-ai/glm-4.6.yaml new file mode 100644 index 0000000..99ff0a0 --- /dev/null +++ b/models/z-ai/glm-4.6.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.6 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.7-flash.yaml b/models/z-ai/glm-4.7-flash.yaml new file mode 100644 index 0000000..5275eef --- /dev/null +++ b/models/z-ai/glm-4.7-flash.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.7-flash +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.7-flashx.yaml b/models/z-ai/glm-4.7-flashx.yaml new file mode 100644 index 0000000..cb473fe --- /dev/null +++ b/models/z-ai/glm-4.7-flashx.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.7-flashx +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-4.7-subscription.yaml b/models/z-ai/glm-4.7-subscription.yaml new file mode 100644 index 0000000..eb5280f --- /dev/null +++ b/models/z-ai/glm-4.7-subscription.yaml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: subscription +model: glm-4.7 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning diff --git a/models/z-ai/glm-4.7.yaml b/models/z-ai/glm-4.7.yaml new file mode 100644 index 0000000..dc96331 --- /dev/null +++ b/models/z-ai/glm-4.7.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-4.7 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-5-turbo-subscription.yaml b/models/z-ai/glm-5-turbo-subscription.yaml new file mode 100644 index 0000000..7b02ec6 --- /dev/null +++ b/models/z-ai/glm-5-turbo-subscription.yaml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: subscription +model: glm-5-turbo +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning diff --git a/models/z-ai/glm-5-turbo.yaml b/models/z-ai/glm-5-turbo.yaml new file mode 100644 index 0000000..0e82642 --- /dev/null +++ b/models/z-ai/glm-5-turbo.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-5-turbo +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-5.1-subscription.yaml b/models/z-ai/glm-5.1-subscription.yaml new file mode 100644 index 0000000..cf6b8d5 --- /dev/null +++ b/models/z-ai/glm-5.1-subscription.yaml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: subscription +model: glm-5.1 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning diff --git a/models/z-ai/glm-5.1.yaml b/models/z-ai/glm-5.1.yaml new file mode 100644 index 0000000..716fee7 --- /dev/null +++ b/models/z-ai/glm-5.1.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-5.1 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/models/z-ai/glm-5.yaml b/models/z-ai/glm-5.yaml new file mode 100644 index 0000000..5788943 --- /dev/null +++ b/models/z-ai/glm-5.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: z-ai +authType: api_key +model: glm-5 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 1 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + applicability: + except: + do_sample: false + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + applicability: + except: + do_sample: false + - path: do_sample + type: boolean + label: Do sample + description: When false, the model uses greedy decoding and ignores temperature and top_p. + default: true + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Toggles the model's extended reasoning before it produces the final answer. + default: enabled + values: + - enabled + - disabled + group: reasoning + - path: response_format.type + type: enum + label: Response format + description: Forces the response into plain text or a JSON object. + default: text + values: + - text + - json_object + group: output_format diff --git a/src/client/logos/z-ai.svg b/src/client/logos/z-ai.svg new file mode 100644 index 0000000..87a8b77 --- /dev/null +++ b/src/client/logos/z-ai.svg @@ -0,0 +1,3 @@ + + + diff --git a/src/data/display.ts b/src/data/display.ts index bbf2e55..e1a1edf 100644 --- a/src/data/display.ts +++ b/src/data/display.ts @@ -12,6 +12,7 @@ const PROVIDER_LABELS: Record = { minimax: "MiniMax", cohere: "Cohere", perplexity: "Perplexity", + "z-ai": "Z.ai", moonshot: "Moonshot AI", }; @@ -30,6 +31,18 @@ const MODEL_LABEL_OVERRIDES: Record = { "minimax/minimax-m2.5-highspeed": "MiniMax M2.5 Highspeed", "minimax/minimax-m2.7": "MiniMax M2.7", "minimax/minimax-m2.7-highspeed": "MiniMax M2.7 Highspeed", + "z-ai/glm-5.1": "GLM-5.1", + "z-ai/glm-5": "GLM-5", + "z-ai/glm-5-turbo": "GLM-5-Turbo", + "z-ai/glm-4.7": "GLM-4.7", + "z-ai/glm-4.7-flash": "GLM-4.7-Flash", + "z-ai/glm-4.7-flashx": "GLM-4.7-FlashX", + "z-ai/glm-4.6": "GLM-4.6", + "z-ai/glm-4.5": "GLM-4.5", + "z-ai/glm-4.5-air": "GLM-4.5-Air", + "z-ai/glm-4.5-x": "GLM-4.5-X", + "z-ai/glm-4.5-airx": "GLM-4.5-AirX", + "z-ai/glm-4.5-flash": "GLM-4.5-Flash", "moonshot/kimi-k2.6": "Kimi K2.6", "moonshot/kimi-k2.5": "Kimi K2.5", "moonshot/moonshot-v1-8k": "Moonshot v1 8K",